% !Mode:: "TeX:UTF-8"
% !TEX encoding = UTF-8 Unicode

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 1------------------------------------------------------

@book{慧立2000大慈恩寺三藏法師傳,
  title={大慈恩寺三藏法師傳},
  author={慧立 and 彦悰 and 道宣},
  volume={2},
  year={2000},
  publisher={中华书局}
}

@book{2019cns,
  title={2019中国语言服务行业发展报告},
  author={中国翻译协会},
  publisher={中国翻译协会},
  year={2019}
}

@article{赵军峰2019深化改革,
  title ={深化改革 探讨创新 推进发展——全国翻译专业学位研究生教育2019年会综述},
  author ={赵军峰,姚恺璇},
  journal ={中国翻译},
  year ={2019},
}

@book{knowlson1975universal,
	title={Universal Language Schemes in England and France 1600-1800},
	author={James {Knowlson}},
	year={1975},
	publisher={University of Toronto Press}
}

@article{DBLP:journals/bstj/Shannon48,
  author    = {Claude E. Shannon},
  title     = {A mathematical theory of communication},
  journal   = {Bell System Technical Journal},
  volume    = {27},
  number    = {3},
  pages     = {379--423},
  year      = {1948}
}

@article{shannon1949the,
	title={The mathematical theory of communication},
	author={Claude E. {Shannon} and Warren {Weaver}},
	journal={IEEE Transactions on Instrumentation and Measurement},
	volume={13},
	year={1949}
}

@article{weaver1955translation,
  title={Translation},
  author={Weaver, Warren},
  journal={Machine translation of languages},
  volume={14},
  number={15-23},
  pages={10},
  year={1955},
  publisher={Cambridge: Technology Press, MIT}
}

@article{chomsky1957syntactic,
  title={Syntactic Structures},
  author={Chomsky, Noam},
  journal={Language},
  volume={33},
  number={3},
  year={1957},
}

@inproceedings{DBLP:conf/coling/SatoN90,
  author    = {Satoshi Sato and
               Makoto Nagao},
  title     = {Toward Memory-based Translation},
  publisher = {International Conference on Computational Linguistics},
  pages     = {247--252},
  year      = {1990}
}

@article{DBLP:journals/coling/BrownPPM94,
  author    = {Peter F. Brown and
               Stephen Della Pietra and
               Vincent J. Della Pietra and
               Robert L. Mercer},
  title     = {The Mathematics of Statistical Machine Translation: Parameter Estimation},
  journal   = {Computational Linguistics},
  volume    = {19},
  number    = {2},
  pages     = {263--311},
  year      = {1993}
}

@article{nirenburg1989knowledge,
  title={Knowledge-based machine translation},
  author={Nirenburg, Sergei},
  journal={Machine Translation},
  volume={4},
  number={1},
  pages={5--24},
  year={1989},
  publisher={Springer}
}

@book{hutchins1986machine,
  title={Machine translation: past, present, future},
  author={Hutchins, William John},
  year={1986},
  publisher={Ellis Horwood Chichester}
}

@article{zarechnak1979history,
  title={The history of machine translation},
  author={Zarechnak, Michael},
  journal={Machine Translation},
  volume={1979},
  pages={1--87},
  year={1979}
}

@book{冯志伟2004机器翻译研究,
  title={机器翻译研究},
  author={冯志伟},
  publisher={中国对外翻译出版公司},
  year={2004},
}

@article{王宝库1991机器翻译系统中一种规则描述语言,
  title={机器翻译系统中一种规则描述语言(CTRDL)},
  author={王宝库,张中义,姚天顺},
  journal={中文信息学报},
  volume={5},
  number={4},
  year={1991},
}

@article{唐泓英1995基于搭配词典的词汇语义驱动算法,
  title={基于搭配词典的词汇语义驱动算法},
  author={唐泓英,姚天顺},
  journal={软件学报},
  volume={6},
  number={A01},
  pages={78-85},
  year={1995},
}

@article{nagao1984framework,
  title={A framework of a mechanical translation between Japanese and English by analogy principle},
  author={Nagao, Makoto},
  journal={Artificial and human intelligence},
  pages={351--354},
  year={1984}
}

@article{gale1993a,
	title={A program for aligning sentences in bilingual corpora},
	author={William A. {Gale} and Kenneth W. {Church}},
	journal={Computational Linguistics},
	volume={19},
	number={1},
	pages={75--102},
	year={1993}
}

@article{Wu2016GooglesNM,
  author    = {Yonghui Wu and
               Mike Schuster and
               Zhifeng Chen and
               Quoc V. Le and
               Mohammad Norouzi and
               Wolfgang Macherey and
               Maxim Krikun and
               Yuan Cao and
               Qin Gao and
               Klaus Macherey and
               Jeff Klingner and
               Apurva Shah and
               Melvin Johnson and
               Xiaobing Liu and
               Lukasz Kaiser and
               Stephan Gouws and
               Yoshikiyo Kato and
               Taku Kudo and
               Hideto Kazawa and
               Keith Stevens and
               George Kurian and
               Nishant Patil and
			   Wei Wang and
               Cliff Young and
               Jason Smith and
               Jason Riesa and
               Alex Rudnick and
               Oriol Vinyals and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Neural Machine Translation System: Bridging the Gap between
               Human and Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1609.08144},
  year      = {2016}
}

@inproceedings{DBLP:journals/corr/LuongPM15,
  author    = {Thang Luong and
               Hieu Pham and
               Christopher D. Manning},
  title     = {Effective Approaches to Attention-based Neural Machine Translation},
  publisher = {Conference on Empirical Methods in Natural
               Language Processing},
  pages     = {1412--1421},
  year      = {2015}
}

@inproceedings{DBLP:journals/corr/GehringAGYD17,
  author    = {Jonas Gehring and
               Michael Auli and
               David Grangier and
               Denis Yarats and
               Yann N. Dauphin},
  title     = {Convolutional Sequence to Sequence Learning},
  publisher = {International Conference on Machine Learning},
  volume    = {70},
  pages     = {1243--1252},
  year      = {2017}
}

@inproceedings{bahdanau2014neural,
  author    = {Dzmitry Bahdanau and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  publisher = {International Conference on Learning Representations},
  year      = {2015}
}

@inproceedings{NIPS2014_5346,
  author    = {Ilya Sutskever and
               Oriol Vinyals and
               Quoc V. Le},
  title     = {Sequence to Sequence Learning with Neural Networks},
  publisher = {Advances in Neural Information Processing Systems},
  pages     = {3104--3112},
  year      = {2014}
}

@book{koehn2009statistical,
  author    = {Philipp Koehn},
  title     = {Statistical Machine Translation},
  publisher = {Cambridge University Press},
  year      = {2010}
}

@book{DBLP:journals/corr/abs-1709-07809,
  author    = {Philipp Koehn},
  title     = {Neural Machine Translation},
  publisher   = {Cambridge University Press},
  year      = {2020}
}

@book{宗成庆2013统计自然语言处理,
  title ={统计自然语言处理},
  author ={宗成庆},
  year ={2013},
  publisher ={清华大学出版社}
}

@book{Goodfellow-et-al-2016,
  author    = {Ian J. Goodfellow and
               Yoshua Bengio and
               Aaron C. Courville},
  title     = {Deep Learning},
  publisher = {{MIT} Press},
  year      = {2016}
}

@article{goldberg2017neural,
  title={Neural network methods for natural language processing},
  author={Goldberg, Yoav},
  journal={Synthesis Lectures on Human Language Technologies},
  volume={10},
  number={1},
  pages={1--309},
  year={2017},
  publisher={Morgan \& Claypool Publishers}
}

@book{周志华2016机器学习,
  title ={机器学习},
  author ={周志华},
  year ={2016},
  publisher ={清华大学出版社}
}

@book{李航2019统计学习方法,
  title ={统计学习方法},
  author ={李航},
  year ={2019},
  publisher ={清华大学出版社}
}

@book{邱锡鹏2020神经网络与深度学习,
  title ={神经网络与深度学习},
  author ={邱锡鹏},
  publisher ={机械工业出版社},
  year ={2020}
}

%%%%% chapter 1------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 2------------------------------------------------------

@book{kolmogorov2018foundations,
  title ={Foundations of the theory of probability: Second English Edition},
  author ={Kolmogorov, Andre Nikolaevich and Bharucha-Reid, Albert T},
  year ={2018},
  publisher ={Courier Dover Publications}
}

@book{mao-prob-book-2011,
  title ={概率论与数理统计教程: 第二版},
  author ={魏宗舒},
  year ={2011},
  publisher ={北京: 高等教育出版社}
}

@article{resnick1992adventures,
    author = {Barbour, A. and Resnick, Sidney},
    year = {1993},
    month = {12},
    pages = {1474},
    title = {Adventures in Stochastic Processes.},
    volume = {88},
    journal = {Journal of the American Statistical Association}
}

@book{liuke-markov-2004,
  title ={实用马尔可夫决策过程},
  author ={刘克},
  year ={2004},
  publisher ={清华大学出版社}
}

@article{gale1995good,
  author    = {William A. Gale and
               Geoffrey Sampson},
  title     = {Good-Turing Frequency Estimation Without Tears},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {2},
  number    = {3},
  pages     = {217--237},
  year      = {1995}
}

@article{good1953population,
  title ={The population frequencies of species and the estimation of population parameters},
  author ={Good, Irving J},
  journal ={Biometrika},
  volume ={40},
  number ={3-4},
  pages ={237--264},
  year ={1953},
  publisher ={Oxford University Press}
}

@inproceedings{kneser1995improved,
  author    = {Reinhard Kneser and
               Hermann Ney},
  title     = {Improved backing-off for M-gram language modeling},
  publisher = {International Conference on Acoustics, Speech, and Signal Processing},
  pages     = {181--184},
  year      = {1995}
}

@inproceedings{ney1991smoothing,
  title={On smoothing techniques for bigram-based natural language modelling},
  author={Ney, Hermann and Essen, Ute},
  publisher={International Conference on Acoustics, Speech, and Signal Processing},
  pages={825--828},
  year={1991},
}

@inproceedings{stolcke2002srilm,
  author    = {Andreas Stolcke},
  title     = {{SRILM} - an extensible language modeling toolkit},
  publisher = {International Conference on Spoken Language Processing},
  year      = {2002}
}

@article{chen1999empirical,
  author    = {Stanley F. Chen and
               Joshua Goodman},
  title     = {An empirical study of smoothing techniques for language modeling},
  journal   = {Computer Speech \& Language},
  volume    = {13},
  number    = {4},
  pages     = {359--393},
  year      = {1999}
}

@article{ney1994structuring,
  author    = {Hermann Ney and
               Ute Essen and
               Reinhard Kneser},
  title     = {On structuring probabilistic dependences in stochastic language modelling},
  journal   = {Computer Speech \& Language},
  volume    = {8},
  number    = {1},
  pages     = {1--38},
  year      = {1994}
}

@book{parsing2009speech,
  author    = {Dan Jurafsky and
               James H. Martin},
  title     = {Speech and language processing: an introduction to natural language
               processing, computational linguistics, and speech recognition, 2nd
               Edition},
  publisher = {Prentice Hall, Pearson Education International},
  year      = {2009}
}

@book{DBLP:books/mg/CormenLR89,
  author    = {Thomas H. Cormen and
               Charles E. Leiserson and
               Ronald L. Rivest},
  title     = {Introduction to Algorithms},
  publisher = {The {MIT} Press and McGraw-Hill Book Company},
  year      = {1989}
}

@book{even2011graph,
  title={Graph algorithms},
  author={Even, Shimon},
  year={2011},
  publisher={Cambridge University Press}
}

@article{tarjan1972depth,
	title={Depth-First Search and Linear Graph Algorithms},
	author={Robert Endre {Tarjan}},
	journal={SIAM Journal on Computing},
	volume={1},
	number={2},
	pages={146--160},
	year={1972}
}

@article{DBLP:journals/ai/SabharwalS11,
  author    = {Ashish Sabharwal and
               Bart Selman},
  title     = {S. Russell, P. Norvig, Artificial Intelligence: {A} Modern Approach,
               Third Edition},
  journal   = {Artificial Intelligence},
  volume    = {175},
  number    = {5-6},
  pages     = {935--937},
  year      = {2011}
}

@book{sahni1978fundamentals,
	title={Fundamentals of Computer Algorithms},
	author={Sartaj {Sahni} and Ellis {Horowitz}},
	year={1978},
	publisher={Computer Science Press}
}

@article{hart1968a,
	title={A Formal Basis for the Heuristic Determination of Minimum Cost Paths},
	author={Peter E. {Hart} and Nils J. {Nilsson} and Bertram {Raphael}},
	journal={IEEE Transactions on Systems Science and Cybernetics},
	volume={4},
	number={2},
	pages={100--107},
	year={1968}
}

@book{lowerre1976the,
	title={The HARPY speech recognition system},
	author={Bruce T. {Lowerre}},
	publisher={Carnegie Mellon University},
	year={1976}
}

@book{bishop1995neural,
	title={Neural networks for pattern recognition},
	author={Christopher M. {Bishop}},
	year={1995},
	publisher={Oxford university press}
}

@article{åström1965optimal,
	title={Optimal control of Markov processes with incomplete state information},
	author={Karl Johan {Åström}},
	journal={Journal of Mathematical Analysis and Applications},
	volume={10},
	number={1},
	pages={174--205},
	year={1965}
}

@article{korf1990real,
	title={Real-time heuristic search},
	author={Richard E. {Korf}},
	journal={Artificial Intelligence},
	volume={42},
	number={2},
	pages={189--211},
	year={1990}
}

@inproceedings{DBLP:conf/emnlp/HuangZM17,
  author    = {Liang Huang and
               Kai Zhao and
               Mingbo Ma},
  title     = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo
               beam size)},
  pages     = {2134--2139},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{DBLP:conf/emnlp/Yang0M18,
  author    = {Yilin Yang and
               Liang Huang and
               Mingbo Ma},
  title     = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
               and Stopping Criteria for Neural Machine Translation},
  pages     = {3054--3059},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{jelinek1980interpolated,
	title={Interpolated estimation of Markov source parameters from sparse data},
	author={F. {Jelinek}},
	journal={Pattern Recognition in Practice},
	pages={381--397},
	year={1980}
}

@article{katz1987estimation,
	title={Estimation of probabilities from sparse data for the language model component of a speech recognizer},
	author={S. {Katz}},
	journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
	volume={35},
	number={3},
	pages={400--401},
	year={1987}
}

@article{witten1991the,
	title={The zero-frequency problem: estimating the probabilities of novel events in adaptive text compression},
	author={I.H. {Witten} and T.C. {Bell}},
	journal={IEEE Transactions on Information Theory},
	volume={37},
	number={4},
	pages={1085--1094},
	year={1991}
}

@book{bell1990text,
	title={Text compression},
	author={Timothy C. {Bell} and John G. {Cleary} and Ian H. {Witten}},
	year={1990},
	publisher={Prentice Hall}
}

@article{goodman2001a,
	title={A bit of progress in language modeling},
	author={Joshua T. {Goodman}},
	journal={Computer Speech \& Language},
	volume={15},
	number={4},
	pages={403--434},
	year={2001}
}

@inproceedings{kirchhoff2005improved,
	title={Improved Language Modeling for Statistical Machine Translation},
	author={Katrin {Kirchhoff} and Mei {Yang}},
	publisher={Annual Meeting of the Association for Computational Linguistics},
	pages={125--128},
	year={2005}
}

@inproceedings{koehn2007factored,
	title={Factored Translation Models},
	author={Philipp {Koehn} and Hieu {Hoang}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={868--876},
	year={2007}
}

@inproceedings{sarikaya2007joint,
	title={Joint Morphological-Lexical Language Modeling for Machine Translation},
	author={Ruhi {Sarikaya} and Yonggang {Deng}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={145--148},
	year={2007}
}

@inproceedings{heafield2011kenlm,
	title={KenLM: Faster and Smaller Language Model Queries},
	author={Kenneth {Heafield}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={187--197},
	year={2011}
}

@inproceedings{federico2006how,
	title={How Many Bits Are Needed To Store Probabilities for Phrase-Based Translation?},
	author={Marcello {Federico} and Nicola {Bertoldi}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={94--101},
	year={2006}
}

@inproceedings{federico2007efficient,
	title={Efficient Handling of N-gram Language Models for Statistical Machine Translation},
	author={Marcello {Federico} and Mauro {Cettolo}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={88--95},
	year={2007}
}

@inproceedings{talbot2007randomised,
	title={Randomised Language Modelling for Statistical Machine Translation},
	author={David {Talbot} and Miles {Osborne}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={512--519},
	year={2007}
}

@inproceedings{talbot2007smoothed,
	title={Smoothed Bloom Filter Language Models: Tera-Scale LMs on the Cheap},
	author={David {Talbot} and Miles {Osborne}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={468--476},
	year={2007}
}

@article{jing2019a,
	title={A Survey on Neural Network Language Models.},
	author={Kun {Jing} and Jungang {Xu}},
	journal={arXiv preprint arXiv:1906.03591},
	year={2019}
}

@article{bengio2003a,
	title={A neural probabilistic language model},
	author={Yoshua {Bengio} and Réjean {Ducharme} and Pascal {Vincent} and Christian {Janvin}},
	journal={Journal of Machine Learning Research},
	volume={3},
	number={6},
	pages={1137--1155},
	year={2003}
}

@inproceedings{mikolov2010recurrent,
  author    = {Tomas Mikolov and
               Martin Karafi{\'{a}}t and
               Luk{\'{a}}s Burget and
               Jan Cernock{\'{y}} and
               Sanjeev Khudanpur},
  title     = {Recurrent neural network based language model},
  pages     = {1045--1048},
  publisher = {International Speech Communication Association},
  year      = {2010}
}

@inproceedings{sundermeyer2012lstm,
  author    = {Martin Sundermeyer and
               Ralf Schl{\"{u}}ter and
               Hermann Ney},
  title     = {{LSTM} Neural Networks for Language Modeling},
  pages     = {194--197},
  publisher = {International Speech Communication Association},
  year      = {2012}
}

@inproceedings{vaswani2017attention,
	title={Attention is All You Need},
	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
	publisher={International Conference on Neural Information Processing},
	pages={5998--6008},
	year={2017}
}

@inproceedings{tillmann1997a,
	title={A DP-based Search Using Monotone Alignments in Statistical Translation},
	author={Christoph {Tillmann} and Stephan {Vogel} and Hermann {Ney} and Alex {Zubiaga}},
  publisher = {Morgan Kaufmann Publishers},
	pages={289--296},
	year={1997}
}

@inproceedings{DBLP:conf/acl/WangW97,
  author    = {Ye-Yi Wang and
               Alex Waibel},
  title     = {Decoding Algorithm in Statistical Machine Translation},
  pages     = {366--372},
  publisher = {Morgan Kaufmann Publishers},
  year      = {1997}
}

@inproceedings{DBLP:conf/acl/OchUN01,
  author    = {Franz Josef Och and
               Nicola Ueffing and
               Hermann Ney},
  title     = {An Efficient A* Search Algorithm for Statistical Machine Translation},
  publisher = {Proceedings of the {ACL} Workshop on Data-Driven Methods in Machine
               Translation},
  year      = {2001}
}

@inproceedings{germann2001fast,
	title={Fast Decoding and Optimal Decoding for Machine Translation},
	author={Ulrich {Germann} and Michael {Jahr} and Kevin {Knight} and Daniel {Marcu} and Kenji {Yamada}},
  publisher = {Morgan Kaufmann Publishers},
	pages={228--235},
	year={2001}
}

@inproceedings{germann2003greedy,
	title={Greedy decoding for statistical machine translation in almost linear time},
	author={Ulrich {Germann}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={1--8},
	year={2003}
}

@inproceedings{Koehn2007Moses,
  author    = {Philipp Koehn and
               Hieu Hoang and
			    Alexandra Birch and
               Chris Callison-Burch and
               Marcello Federico and
               Nicola Bertoldi and
               Brooke Cowan and
               Wade Shen and
               Christine Moran and
               Richard Zens and
               Chris Dyer and
               Ondrej Bojar and
               Alexandra Constantin and
               Evan Herbst},
  title     = {Moses: Open Source Toolkit for Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{DBLP:conf/amta/Koehn04,
  author    = {Philipp Koehn},
  title     = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
               Translation Models},
  volume    = {3265},
  pages     = {115--124},
  publisher = {Springer},
  year      = {2004}
}

@inproceedings{bangalore2001a,
	title={A finite-state approach to machine translation},
	author={S. {Bangalore} and G. {Riccardi}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={381--388},
	year={2001}
}

@article{DBLP:journals/mt/BangaloreR02,
  author    = {Srinivas Bangalore and
               Giuseppe Riccardi},
  title     = {Stochastic Finite-State Models for Spoken Language Machine Translation},
  journal   = {Machine Translation},
  volume    = {17},
  number    = {3},
  pages     = {165--184},
  year      = {2002}
}

@inproceedings{venugopal2007an,
	title={An Efficient Two-Pass Approach to Synchronous-CFG Driven Statistical MT},
	author={Ashish {Venugopal} and Andreas {Zollmann} and Vogel {Stephan}},
	publisher = {Annual Meeting of the Association for Computational Linguistics},
	pages={500--507},
	year={2007}
}

@inproceedings{zollmann2007the,
  author    = {Andreas Zollmann and
               Ashish Venugopal and
               Matthias Paulik and
               Stephan Vogel},
  title     = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the
               2007 {ACL} Workshop on Statistical Machine Translation},
  pages     = {216--219},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{liu2006tree,
  author    = {Yang Liu and
               Qun Liu and
               Shouxun Lin},
  title     = {Tree-to-String Alignment Template for Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}

@inproceedings{galley2006scalable,
  author    = {Michel Galley and
               Jonathan Graehl and
               Kevin Knight and
               Daniel Marcu and
               Steve DeNeefe and
			   Wei Wang and
               Ignacio Thayer},
  title     = {Scalable Inference and Training of Context-Rich Syntactic Translation
               Models},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}

@inproceedings{chiang2005a,
  author    = {David Chiang},
  title     = {A Hierarchical Phrase-Based Model for Statistical Machine Translation},
  pages     = {263--270},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}

%%%%% chapter 2------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 3------------------------------------------------------

@inproceedings{ng2002discriminative,
  author    = {Ng, Andrew Y and Jordan, Michael I},
  title     = {On Discriminative vs. Generative Classifiers: {A} comparison of logistic
               regression and naive Bayes},
  pages     = {841--848},
  publisher = {{MIT} Press},
  year      = {2001},
}

@inproceedings{huang2008coling,
	author = {Huang, Liang},
    title = {Coling 2008: Advanced Dynamic Programming in Computational Linguistics: Theory, Algorithms and Applications-Tutorial notes},
    year = {2008},
    publisher = {International Conference on Computational Linguistics},
}

@book{aho1972theory,
  author    = {Aho, Alfred V and
               Ullman, Jeffrey D},
  title     = {The theory of parsing, translation, and compiling},
  publisher = {Prentice-Hall Englewood Cliffs, NJ},
  year      = {1973},
}

@inproceedings{rau1991extracting,
  title={Extracting company names from text},
  author={Rau, Lisa F},
  pages={29--30},
  year={1991},
  publisher={IEEE Conference on Artificial Intelligence Application},
}

@article{张小衡1997中文机构名称的识别与分析,
  title={中文机构名称的识别与分析},
  author={张小衡 and 王玲玲},
  journal={中文信息学报},
  volume={11},
  number={4},
  pages={22-33},
  year={1997},
}

@inproceedings{lample2016neural,
  author    = {Guillaume Lample and
               Miguel Ballesteros and
               Sandeep Subramanian and
               Kazuya Kawakami and
               Chris Dyer},
  title     = {Neural Architectures for Named Entity Recognition},
  pages     = {260--270},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016},
}

@article{Baum1966Statistical,
  title={Statistical Inference for Probabilistic Functions of Finite State Markov Chains},
  author={Baum, Leonard E and Petrie, Ted},
  journal={Annals of Mathematical Stats},
  volume={37},
  number={6},
  pages={1554-1563},
  year={1966},
}

@article{baum1970maximization,
  title={A maximization technique occurring in the statistical analysis of probabilistic functions of Markov chains},
  author={Baum, Leonard E and Petrie, Ted and Soules, George and Weiss, Norman},
  journal={Annals of Mathematical Stats},
  volume={41},
  number={1},
  pages={164--171},
  year={1970},
}

@article{1977Maximum,
  title={Maximum likelihood from incomplete data via the EM algorithm},
  author={Dempster, Arthur P and Laird, Nan M and Rubin, Donald B},
  journal={Journal of the Royal Statistical Society: Series B (Methodological)},
  volume={39},
  number={1},
  pages={1--22},
  year={1977}
}

@article{1967Error,
  title={Error bounds for convolutional codes and an asymptotically optimum decoding algorithm},
  author={Viterbi, Andrew},
  journal={IEEE Transactions on Information Theory},
  volume={13},
  number={2},
  pages={260-269},
  year={1967},
}

@article{harrington2013机器学习实战,
  title={机器学习实战},
  author={Harrington, Peter},
  journal={人民邮电出版社, 北京},
  year={2013}
}

@inproceedings{brants-2000-tnt,
    title = {TnT - {A} Statistical Part-of-Speech Tagger},
    author = {Brants, Thorsten},
    year = {2000},
    publisher = {Annual Meeting of the Association for Computational Linguistics},
    pages = {224--231},
}

@inproceedings{tsuruoka-tsujii-2005-chunk,
    title = {Chunk Parsing Revisited},
    author = {Yoshimasa Tsuruoka and
               Jun'ichi Tsujii},
    year = {2005},
    publisher = {Annual Meeting of the Association for Computational Linguistics},
    pages = {133--140},
}

@inproceedings{li-etal-2003-news-oriented,
    title = {News-Oriented Automatic Chinese Keyword Indexing},
    author = {Li, Sujian  and
      Wang, Houfeng  and
      Yu, Shiwen  and
      Xin, Chengsheng},
    year = {2003},
    publisher = {Annual Meeting of the Association for Computational Linguistics},
    pages = {92--97},
}

@article{2015Bidirectional,
  title={Bidirectional LSTM-CRF Models for Sequence Tagging},
  author={ Huang, Zhiheng  and  Xu, Wei  and  Yu, Kai },
  journal={CoRR},
  year={2015},
}

@article{chiu2016named,
  title={Named entity recognition with bidirectional LSTM-CNNs},
  author={Chiu, Jason PC and Nichols, Eric},
  journal={Transactions of the Association for Computational Linguistics},
  volume={4},
  pages={357--370},
  year={2016},
  publisher={MIT Press}
}

@inproceedings{vzukov2018named,
  author    = {Andrej Zukov Gregoric and
               Yoram Bachrach and
               Sam Coope},
  title     = {Named Entity Recognition With Parallel Recurrent Neural Networks},
  pages     = {69--74},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018},
}

@article{Li2020A,
  title={A Survey on Deep Learning for Named Entity Recognition},
  author={Li, Jing and Sun, Aixin and Han, Jianglei and Li, Chenliang},
  journal={IEEE Transactions on Knowledge and Data Engineering},
  volume={PP},
  number={99},
  pages={1-1},
  year={2020},
}

@article{devlin2019bert,
  title={Bert: Pre-training of deep bidirectional transformers for language understanding},
  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  year={2019},
  pages = {4171--4186},
  journal = {Annual Meeting of the Association for Computational Linguistics},
}

@inproceedings{conneau2019unsupervised,
  author    = {Alexis Conneau and
               Kartikay Khandelwal and
               Naman Goyal and
               Vishrav Chaudhary and
               Guillaume Wenzek and
               Francisco Guzm{\'{a}}n and
               Edouard Grave and
               Myle Ott and
               Luke Zettlemoyer and
               Veselin Stoyanov},
  title     = {Unsupervised Cross-lingual Representation Learning at Scale},
  pages     = {8440--8451},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020},
}

@book{chomsky1993lectures,
  title={Lectures on government and binding: The Pisa lectures},
  author={Chomsky, Noam},
  year={1993},
  publisher={Walter de Gruyter}
}

@inproceedings{DBLP:conf/acl/SennrichHB16a,
  author    = {Rico Sennrich and
               Barry Haddow and
               Alexandra Birch},
  title     = {Neural Machine Translation of Rare Words with Subword Units},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016},
}

@article{刘挺1998最大概率分词问题及其解法,
  title={最大概率分词问题及其解法},
  author={刘挺 and 吴岩 and 王开铸},
  journal={哈尔滨工业大学学报},
  number={06},
  pages={37-41},
  year={1998},
}

@article{丁洁2010基于最大概率分词算法的中文分词方法研究,
  title={基于最大概率分词算法的中文分词方法研究},
  author={丁洁},
  journal={科技信息},
  number={21},
  pages={I0075--I0075},
  year={2010}
}

@book{1995University,
  title     = {University of Sheffield: Description of the LaSIE-II system as used for MUC-7},
  author    = {Kevin Humphreys and
               Robert J. Gaizauskas and
               Saliha Azzam and
               Charles Huyck and
               Brian Mitchell and
               Hamish Cunningham and
               Yorick Wilks},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {1995},
}

@inproceedings{krupka1998isoquest,
  title={IsoQuest Inc.: Description of the NetOwl™ Extractor System as Used for MUC-7},
  author={Krupka, George and Hausman, Kevin},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year={1998}
}

@inproceedings{DBLP:conf/muc/BlackRM98,
  author    = {Black, William J and Rinaldi, Fabio and Mowatt, David},
  title     = {{FACILE:} Description of the {NE} System Used for {MUC-7}},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {1998},
}

@article{1996Hidden,
  title={Hidden Markov models.},
  author={ Eddy, Sean R },
  journal={Current Opinion in Structural Biology},
  volume={6},
  number={3},
  pages={361-5},
  year={1996},
}

@inproceedings{lafferty2001conditional,
  author    = {John D. Lafferty and
               Andrew McCallum and
               Fernando C. N. Pereira},
  title     = {Conditional Random Fields: Probabilistic Models for Segmenting and
               Labeling Sequence Data},
  pages     = {282--289},
  publisher = {proceedings of the Eighteenth International Conference on Machine
               Learning},
  year      = {2001},
}

@book{kapur1989maximum,
  title={Maximum-entropy models in science and engineering},
  author={Kapur, Jagat Narain},
  year={1989},
  publisher={John Wiley \& Sons}
}

@article{1998Support,
  title={Support vector machines},
  author={Hearst, Marti A. and Dumais, Susan T and Osuna, Edgar and Platt, John and Scholkopf, Bernhard},
  journal={IEEE Intelligent Systems \& Their Applications},
  volume={13},
  number={4},
  pages={18-28},
  year={1998},
}

@article{2011Natural,
  title={Natural Language Processing (almost) from Scratch},
  author={ Collobert, Ronan  and  Weston, Jason  and Bottou, Léon and  Karlen, Michael  and  Kavukcuoglu, Koray  and  Kuksa, Pavel },
  journal={Journal of Machine Learning Research},
  volume={12},
  number={1},
  pages={2493-2537},
  year={2011},
}

@book{manning2008introduction,
  title={Introduction to information retrieval},
  author={Manning, Christopher D and Sch{\"u}tze, Hinrich and Raghavan, Prabhakar},
  year={2008},
  publisher={Cambridge university press}
}

@article{berger1996maximum,
  title={A maximum entropy approach to natural language processing},
  author={Berger, Adam and Della Pietra, Stephen A and Della Pietra, Vincent J},
  journal={Computational linguistics},
  volume={22},
  number={1},
  pages={39--71},
  year={1996}
}

@book{mitchell1996m,
  title={Machine Learning},
  author={Mitchell, Tom},
  journal={McCraw Hill},
  year={1996}
}

@inproceedings{DBLP:conf/acl/OchN02,
  author    = {Franz Josef Och and
               Hermann Ney},
  title     = {Discriminative Training and Maximum Entropy Models for Statistical
               Machine Translation},
  pages     = {295--302},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2002},
}

@incollection{mohri2008speech,
  title={Speech recognition with weighted finite-state transducers},
  author={Mohri, Mehryar and Pereira, Fernando and Riley, Michael},
  pages={559--584},
  year={2008},
  publisher={Springer}
}

@article{bellman1966dynamic,
  title={Dynamic programming},
  author={Bellman, Richard},
  journal={Science},
  volume={153},
  number={3731},
  pages={34--37},
  year={1966}
}

%%%%% chapter 3------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 4------------------------------------------------------
@inproceedings{DBLP:conf/acl/PapineniRWZ02,
  author    = {Kishore Papineni and
               Salim Roukos and
               Todd Ward and
               Wei-jing Zhu},
  title     = {Bleu: a Method for Automatic Evaluation of Machine Translation},
  pages     = {311--318},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2002}
}
@article{DBLP:journals/mt/ChurchH93,
  title={Good applications for crummy machine translation},
  author={Church, Kenneth W and Hovy, Eduard H},
  volume={8},
  number={4},
  pages={239--258},
  year={1993},
  publisher={Springer}
}
@inproceedings{DBLP:conf/coling/SuWC92,
  author    = {Keh-Yih Su and
               Ming-Wen Wu and
               Jing-Shin Chang},
  title     = {A New Quantitative Quality Measure for Machine Translation Systems},
  publisher = {International Conference on Computational Linguistics},
  pages     = {433--439},
  year      = {1992},
}
@inproceedings{DBLP:conf/interspeech/TillmannVNZS97,
  title={Accelerated DP based search for statistical translation},
  author={Tillmann, Christoph and Vogel, Stephan and Ney, Hermann and Zubiaga, Arkaitz and Sawaf, Hassan},
  publisher={European Conference on Speech Communication and Technology},
  year={1997}
}
@inproceedings{snover2006study,
  title={A study of translation edit rate with targeted human annotation},
  author={Snover, Matthew and Dorr, Bonnie and Schwartz, Richard and Micciulla, Linnea and Makhoul, John},
  publisher={Proceedings of association for machine translation in the Americas},
  volume={200},
  number={6},
  year={2006}
}
@inproceedings{DBLP:conf/muc/Chinchor92,
  author    = {Nancy Chinchor},
  title     = {{MUC-4} evaluation metrics},
  pages     = {22--29},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {1992}
}
@inproceedings{DBLP:conf/emnlp/ChiangDCN08,
  author    = {David Chiang and
               Steve DeNeefe and
               Yee Seng Chan and
               Hwee Tou Ng},
  title     = {Decomposability of Translation Metrics for Improved Evaluation and
               Efficient Algorithms},
  pages     = {610--619},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{DBLP:conf/acl/BanerjeeL05,
  author    = {Satanjeev Banerjee and
               Alon Lavie},
  title     = {{METEOR:} An Automatic Metric for {MT} Evaluation with Improved Correlation
               with Human Judgments},
  pages     = {65--72},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{DBLP:conf/wmt/DenkowskiL10,
  author    = {Michael J. Denkowski and
               Alon Lavie},
  title     = {{METEOR-NEXT} and the {METEOR} Paraphrase Tables: Improved Evaluation
               Support for Five Target Languages},
  pages     = {339--342},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/wmt/DenkowskiL11,
  author    = {Michael J. Denkowski and
               Alon Lavie},
  title     = {Meteor 1.3: Automatic Metric for Reliable Optimization and Evaluation
               of Machine Translation Systems},
  pages     = {85--91},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011},
}
@inproceedings{DBLP:conf/wmt/DenkowskiL14,
  author    = {Michael J. Denkowski and
               Alon Lavie},
  title     = {Meteor Universal: Language Specific Translation Evaluation for Any
               Target Language},
  pages     = {376--380},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@article{DBLP:journals/mt/Shiwen93,
  author    = {Shiwen Yu},
  title     = {Automatic evaluation of output quality for Machine Translation systems},
  journal   = {Mach. Transl.},
  volume    = {8},
  number    = {1-2},
  pages     = {117--126},
  year      = {1993}
}
@inproceedings{DBLP:conf/coling/ZhouWLLZZ08,
  author    = {Ming Zhou and
               Bo Wang and
               Shujie Liu and
               Mu Li and
               Dongdong Zhang and
               Tiejun Zhao},
  title     = {Diagnostic Evaluation of Machine Translation Systems Using Automatically
               Constructed Linguistic Check-Points},
  publisher = {International Conference on Computational Linguistics},
  pages     = {1121--1128},
  year      = {2008}
}
@inproceedings{DBLP:conf/acl/AlbrechtH07a,
  author    = {Joshua Albrecht and
               Rebecca Hwa},
  title     = {A Re-examination of Machine Learning Approaches for Sentence-Level
               {MT} Evaluation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/acl/AlbrechtH07,
  author    = {Joshua Albrecht and
               Rebecca Hwa},
  title     = {Regression for Sentence-Level {MT} Evaluation with Pseudo References},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/naacl/LiuG07,
  author    = {Ding Liu and
               Daniel Gildea},
  title     = {Source-Language Features and Maximum Correlation Training for Machine
               Translation Evaluation},
  pages     = {41--48},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/ijcnlp/GimenezM08,
  author    = {Jes{\'{u}}s Gim{\'{e}}nez and
               Llu{\'{\i}}s M{\`{a}}rquez},
  title     = {Heterogeneous Automatic {MT} Evaluation Through Non-Parametric Metric
               Combinations},
  pages     = {319--326},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{DBLP:conf/naacl/DreyerM12,
  author    = {Markus Dreyer and
               Daniel Marcu},
  title     = {HyTER: Meaning-Equivalent Semantics for Translation Evaluation},
  pages     = {162--171},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/tsd/BojarMTZ13,
  author    = {Ondrej Bojar and
               Matous Mach{\'{a}}cek and
               Ales Tamchyna and
               Daniel Zeman},
  title     = {Scratching the Surface of Possible Translations},
  volume    = {8082},
  pages     = {465--474},
  publisher = {Springer},
  year      = {2013}
}
@inproceedings{DBLP:conf/eamt/QinS15,
  author    = {Ying Qin and
               Lucia Specia},
  title     = {Truly Exploring Multiple References for Machine Translation Evaluation},
  publisher = {European Association for Machine Translation},
  year      = {2015}
}
@inproceedings{DBLP:conf/emnlp/SocherPHNM11,
  author    = {Richard Socher and
               Jeffrey Pennington and
               Eric H. Huang and
               Andrew Y. Ng and
               Christopher D. Manning},
  title     = {Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions},
  pages     = {151--161},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/emnlp/SocherPWCMNP13,
  author    = {Richard Socher and
               Alex Perelygin and
               Jean Wu and
               Jason Chuang and
               Christopher D. Manning and
               Andrew Y. Ng and
               Christopher Potts},
  title     = {Recursive Deep Models for Semantic Compositionality Over a Sentiment
               Treebank},
  pages     = {1631--1642},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013},
}
@article{DBLP:journals/corr/MatsuoKS17,
  author    = {Junki Matsuo and
               Mamoru Komachi and
               Katsuhito Sudoh},
  title     = {Word-Alignment-Based Segment-Level Machine Translation Evaluation
               using Word Embeddings},
  journal   = {CoRR},
  volume    = {abs/1704.00380},
  year      = {2017}
}
@article{DBLP:journals/csl/GuzmanJMN17,
  title={Machine translation evaluation with neural networks},
  author={Guzm{\'a}n, Francisco and Joty, Shafiq and M{\`a}rquez, Llu{\'\i}s and Nakov, Preslav},
  journal={Computer Speech \& Language},
  volume={45},
  pages={180--200},
  year={2017}
}
@inproceedings{gamon2005sentence,
  title={Sentence-level MT evaluation without reference translations: Beyond language modeling},
  author={Gamon, Michael and Aue, Anthony and Smets, Martine},
  publisher={Proceedings of EAMT},
  pages={103--111},
  year={2005}
}
@inproceedings{DBLP:conf/lrec/Quirk04,
  author    = {Christopher Quirk},
  title     = {Training a Sentence-Level Machine Translation Confidence Measure},
  publisher = {European Language Resources Association},
  year      = {2004}
}
@inproceedings{DBLP:conf/icassp/JonesGSGHRW05,
  author    = {Douglas A. Jones and
               Edward Gibson and
               Wade Shen and
               Neil Granoien and
               Martha Herzog and
               Douglas A. Reynolds and
               Clifford J. Weinstein},
  title     = {Measuring human readability of machine generated text: three case
               studies in speech recognition and machine translation},
  pages     = {1009--1012},
  publisher = {{IEEE}},
  year      = {2005}
}
@inproceedings{DBLP:conf/eamt/ScartonZVGS15,
  author    = {Carolina Scarton and
               Marcos Zampieri and
               Mihaela Vela and
               Josef van Genabith and
               Lucia Specia},
  title     = {Searching for Context: a Study on Document-Level Labels for Translation
               Quality Estimation},
  publisher = {European Association for Machine Translation},
  year      = {2015}
}
@inproceedings{DBLP:conf/interspeech/FetterDR96,
  title={Word graph rescoring using confidence measures},
  author={Fetter, Pablo and Dandurand, Fr{\'e}d{\'e}ric and Regel-Brietzmann, Peter},
  publisher={Proceeding of Fourth International Conference on Spoken Language Processing},
  volume={1},
  pages={10--13},
  year={1996}
}
@inproceedings{DBLP:conf/acl/ShenCHHWSL16,
  author    = {Shiqi Shen and
               Yong Cheng and
               Zhongjun He and
               Wei He and
               Hua Wu and
               Maosong Sun and
               Yang Liu},
  title     = {Minimum Risk Training for Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016},
}
@inproceedings{DBLP:conf/wmt/FreitagCR19,
  author    = {Markus Freitag and
               Isaac Caswell and
               Scott Roy},
  title     = {{APE} at Scale and Its Implications on {MT} Evaluation Biases},
  pages     = {34--44},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/mt/BiciciGG13,
  title={Predicting sentence translation quality using extrinsic and language independent features},
  author={Bi{\c{c}}ici, Ergun and Groves, Declan and van Genabith, Josef},
  journal={Machine Translation},
  volume={27},
  number={3-4},
  pages={171--192},
  year={2013}
}
@inproceedings{DBLP:conf/wmt/BiciciW14,
  author    = {Ergun Bi{\c{c}}ici and
               Andy Way},
  title     = {Referential Translation Machines for Predicting Translation Quality},
  pages     = {313--321},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/wmt/BiciciLW15a,
  author    = {Ergun Bi{\c{c}}ici and
               Qun Liu and
               Andy Way},
  title     = {Referential Translation Machines for Predicting Translation Quality
               and Related Statistics},
  pages     = {304--308},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/iclr/LogeswaranL18,
  author    = {Lajanugen Logeswaran and
               Honglak Lee},
  title     = {An efficient framework for learning sentence representations},
  publisher = {OpenReview.net},
  year      = {2018}
}
@inproceedings{iyyer-etal-2015-deep,
    title = {Deep Unordered Composition Rivals Syntactic Methods for Text Classification},
    author = {Iyyer, Mohit  and
      Manjunatha, Varun  and
      Boyd-Graber, Jordan  and
      Daum{\'e} III, Hal},
    month = jul,
    year = {2015},
    publisher = {Annual Meeting of the Association for Computational Linguistics},
    pages = {1681--1691},
}
@inproceedings{DBLP:journals/corr/abs-1301-3781,
  author    = {Tomas Mikolov and
               Kai Chen and
               Greg Corrado and
               Jeffrey Dean},
  title     = {Efficient Estimation of Word Representations in Vector Space},
  journal   = {arXiv preprint arXiv:1301.3781},
  year      = {2013}
}
@inproceedings{DBLP:conf/icml/LeM14,
  title={Distributed representations of sentences and documents},
  author={Le, Quoc and Mikolov, Tomas},
  publisher={International conference on machine learning},
  pages={1188--1196},
  year={2014}
}
@inproceedings{DBLP:conf/acl/AthiwaratkunW17,
  author    = {Ben Athiwaratkun and
               Andrew Gordon Wilson},
  title     = {Multimodal Word Distributions},
  pages     = {1645--1656},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/PenningtonSM14,
  author    = {Jeffrey Pennington and
               Richard Socher and
               Christopher D. Manning},
  title     = {Glove: Global Vectors for Word Representation},
  pages     = {1532--1543},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/nips/KirosZSZUTF15,
  title={Skip-thought vectors},
  author={Kiros, Ryan and Zhu, Yukun and Salakhutdinov, Russ R and Zemel, Richard and Urtasun, Raquel and Torralba, Antonio and Fidler, Sanja},
  publisher={Advances in neural information processing systems},
  pages={3294--3302},
  year={2015}
}
@inproceedings{DBLP:conf/naacl/PetersNIGCLZ18,
  author    = {Matthew E. Peters and
               Mark Neumann and
               Mohit Iyyer and
               Matt Gardner and
               Christopher Clark and
               Kenton Lee and
               Luke Zettlemoyer},
  title     = {Deep Contextualized Word Representations},
  pages     = {2227--2237},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@misc{radford2018improving,
  title={Improving language understanding by generative pre-training},
  author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
  year={2018}
}
@article{DBLP:journals/mtcl/Carroll66,
  author    = {John B. Carroll},
  title     = {An experiment in evaluating the quality of translations},
  journal   = {Mech. Transl. Comput. Linguistics},
  volume    = {9},
  number    = {3-4},
  pages     = {55--66},
  year      = {1966}
}
@inproceedings{DBLP:conf/amta/WhiteOO94,
  title={The ARPA MT evaluation methodologies: evolution, lessons, and future approaches},
  author={White, John S and O’Connell, Theresa A and O’Mara, Francis E},
  publisher={Proceedings of the First Conference of the Association for Machine Translation in the Americas},
  year={1994}
}
@inproceedings{king2003femti,
  title={FEMTI: creating and using a framework for MT evaluation},
  author={King, Margaret and Popescu-Belis, Andrei and Hovy, Eduard},
  publisher={Proceedings of MT Summit IX, New Orleans, LA},
  pages={224--231},
  year={2003}
}
@article{DBLP:journals/mt/PrzybockiPBS09,
  author    = {Mark A. Przybocki and
               Kay Peterson and
               Sebastien Bronsart and
               Gregory A. Sanders},
  title     = {The {NIST} 2008 Metrics for machine translation challenge - overview,
               methodology, metrics, and results},
  journal   = {Machine Translation},
  volume    = {23},
  number    = {2-3},
  pages     = {71--103},
  year      = {2009}
}
@inproceedings{reeder2006direct,
  title={Direct application of a language learner test to MT evaluation},
  author={Reeder, Florence},
  publisher={Proceedings of AMTA},
  year={2006}
}
@inproceedings{DBLP:conf/wmt/Callison-BurchF07,
  author    = {Chris Callison-Burch and
               Cameron S. Fordyce and
			   Philipp Koehn and
               Christof Monz and
               Josh Schroeder},
  title     = {(Meta-) Evaluation of Machine Translation},
  pages     = {136--158},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/wmt/Callison-BurchK12,
  author    = {Chris Callison-Burch and
               Philipp Koehn and
               Christof Monz and
               Matt Post and
               Radu Soricut and
               Lucia Specia},
  title     = {Findings of the 2012 Workshop on Statistical Machine Translation},
  pages     = {10--51},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/wmt/Lopez12,
  author    = {Adam Lopez},
  title     = {Putting Human Assessments of Machine Translation Systems in Order},
  pages     = {1--9},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/wmt/BojarCFHHHKLMNP15,
  author    = {Ondrej Bojar and
               Rajen Chatterjee and
               Christian Federmann and
               Barry Haddow and
               Matthias Huck and
               Chris Hokamp and
			   Philipp Koehn and
               Varvara Logacheva and
               Christof Monz and
               Matteo Negri and
               Matt Post and
               Carolina Scarton and
               Lucia Specia and
               Marco Turchi},
  title     = {Findings of the 2015 Workshop on Statistical Machine Translation},
  pages     = {1--46},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/iwslt/Koehn12,
  author    = {Philipp Koehn},
  title     = {Simulating human judgment in machine translation evaluation campaigns},
  pages     = {179--184},
  publisher = {International Workshop on Spoken Language Translation},
  year      = {2012}
}
@inproceedings{DBLP:conf/acl/LiuG05,
  author    = {Ding Liu and
               Daniel Gildea},
  title     = {Syntactic Features for Evaluation of Machine Translation},
  pages     = {25--32},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{DBLP:conf/wmt/GimenezM07a,
  author    = {Jes{\'{u}}s Gim{\'{e}}nez and
               Llu{\'{\i}}s M{\`{a}}rquez},
  title     = {Linguistic Features for Automatic Evaluation of Heterogenous {MT}
               Systems},
  pages     = {256--264},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@article{DBLP:journals/mt/PadoCGJM09,
  author    = {Sebastian Pad{\'{o}} and
               Daniel M. Cer and
               Michel Galley and
               Dan Jurafsky and
               Christopher D. Manning},
  title     = {Measuring machine translation quality as semantic equivalence: {A}
               metric based on entailment features},
  journal   = {Machine Translation},
  volume    = {23},
  number    = {2-3},
  pages     = {181--193},
  year      = {2009}
}
@inproceedings{DBLP:conf/ssst/OwczarzakGW07,
  author    = {Karolina Owczarzak and
               Josef van Genabith and
               Andy Way},
  title     = {Dependency-Based Automatic Evaluation for Machine Translation},
  pages     = {80--87},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007},
}
@inproceedings{DBLP:conf/wmt/OwczarzakGW07,
  author    = {Karolina Owczarzak and
               Josef van Genabith and
               Andy Way},
  title     = {Labelled Dependencies in Machine Translation Evaluation},
  pages     = {104--111},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007},
}
@inproceedings{DBLP:conf/coling/YuWXJLL14,
  author    = {Hui Yu and
               Xiaofeng Wu and
               Jun Xie and
               Wenbin Jiang and
               Qun Liu and
               Shouxun Lin},
  title     = {{RED:} {A} Reference Dependency Based {MT} Evaluation Metric},
  pages     = {2042--2051},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/wmt/PopovicN09,
  author    = {Maja Popovic and
               Hermann Ney},
  title     = {Syntax-Oriented Evaluation Measures for Machine Translation Output},
  pages     = {29--32},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{DBLP:conf/acl/BanchsL11,
  author    = {Rafael E. Banchs and
               Haizhou Li},
  title     = {{AM-FM:} {A} Semantic Framework for Translation Quality Assessment},
  pages     = {153--158},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{reeder2006measuring,
  title={Measuring MT adequacy using latent semantic analysis},
  author={Reeder, Florence},
  publisher={Proceedings of the 7th Conference of the Association for Machine Translation of the Americas. Cambridge, Massachusetts},
  pages={176--184},
  year={2006}
}
@inproceedings{DBLP:conf/acl/LoBSW14,
  author    = {Chi-kiu Lo and
               Meriem Beloucif and
               Markus Saers and
               Dekai Wu},
  title     = {{XMEANT:} Better semantic {MT} evaluation without reference translations},
  pages     = {765--771},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/lrec/VilarXDN06,
  author    = {David Vilar and
               Jia Xu and
               Luis Fernando D'Haro and
               Hermann Ney},
  title     = {Error Analysis of Statistical Machine Translation Output},
  pages     = {697--702},
  publisher = {European Language Resources Association {(ELRA)}},
  year      = {2006}
}
@inproceedings{popovic2011human,
  title={From human to automatic error classification for machine translation output},
  author={Popovic, Maja and Burchardt, Aljoscha and others},
  publisher={European Association for Machine Translation},
  year={2011}
}
@article{DBLP:journals/mt/CostaLLCC15,
  author    = {{\^{A}}ngela Costa and
               Wang Ling and
               Tiago Lu{\'{\i}}s and
               Rui Correia and
               Lu{\'{\i}}sa Coheur},
  title     = {A linguistically motivated taxonomy for Machine Translation error
               analysis},
  journal   = {Machine Translation},
  volume    = {29},
  number    = {2},
  pages     = {127--161},
  year      = {2015}
}
@inproceedings{lommel2014using,
  title={Using a new analytic measure for the annotation and analysis of MT errors on real data},
  author={Lommel, Arle and Burchardt, Aljoscha and Popovic, Maja and Harris, Kim and Avramidis, Eleftherios and Uszkoreit, Hans},
  publisher={European Association for Machine Translation},
  pages={165--172},
  year={2014}
}
@inproceedings{DBLP:conf/wmt/PopovicGGLNMFB06,
  author    = {Maja Popovic and
               Adri{\`{a}} de Gispert and
               Deepa Gupta and
               Patrik Lambert and
               Hermann Ney and
               Jos{\'{e}} B. Mari{\~{n}}o and
               Marcello Federico and
               Rafael E. Banchs},
  title     = {Morpho-syntactic Information for Automatic Error Analysis of Statistical
               Machine Translation Output},
  pages     = {1--6},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/wmt/PopovicN07,
  author    = {Maja Popovic and
               Hermann Ney},
  title     = {Word Error Rates: Decomposition over {POS} classes and Applications
               for Error Analysis},
  pages     = {48--55},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/acl/GonzalezMM13,
  author    = {Meritxell Gonz{\'{a}}lez and
               Laura Mascarell and
               Llu{\'{\i}}s M{\`{a}}rquez},
  title     = {tSEARCH: Flexible and Fast Search over Automatic Translations for
               Improved Quality/Error Analysis},
  pages     = {181--186},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{coughlin2003correlating,
  title={Correlating automated and human assessments of machine translation quality},
  author={Coughlin, Deborah},
  year={2003}
}
@inproceedings{popescu2003experiment,
  title={An experiment in comparative evaluation: humans vs. computers},
  author={Popescu-Belis, Andrei},
  publisher={Proceedings of the Ninth Machine Translation Summit. New Orleans},
  year={2003}
}
@article{pearson1920notes,
  title={Notes on the history of correlation},
  author={Pearson, Karl},
  journal={Biometrika},
  volume={13},
  number={1},
  pages={25--45},
  year={1920},
  publisher={JSTOR}
}
@inproceedings{culy2003limits,
  title={The limits of N-gram translation evaluation metrics},
  author={Culy, Christopher and Riehemann, Susanne Z},
  publisher={MT Summit IX},
  pages={71--78},
  year={2003}
}
@article{finch2004using,
  title={Using a paraphraser to improve machine translation evaluation},
  author={Finch, Andrew and Akiba, Yasuhiro and Sumita, Eiichiro},
  journal={International Joint Conference on Natural Language Processing},
  year={2004}
}
@inproceedings{DBLP:conf/coling/HamonM08,
  author    = {Olivier Hamon and
               Djamel Mostefa},
  title     = {The Impact of Reference Quality on Automatic {MT} Evaluation},
  publisher = {International conference on machine learning},
  pages     = {39--42},
  year      = {2008}
}
@inproceedings{doddington2002automatic,
  title={Automatic evaluation of machine translation quality using n-gram co-occurrence statistics},
  publisher={Proceedings of the second international conference on Human Language Technology Research},
  author={Doddington, George},
  pages={138--145},
  year={2002}
}
@inproceedings{callison2006re,
  title={Re-evaluation the role of bleu in machine translation research},
  author={Callison-Burch, Chris and Osborne, Miles and Koehn, Philipp},
  publisher={11th Conference of the European Chapter of the Association for Computational Linguistics},
  year={2006}
}
@InProceedings{Miller:2005:MTS,
  author    = {Keith J. Miller and Michelle Vanni},
  title     = {Inter-rater Agreement Measures, and the Refinement of Metrics in the PLATO MT Evaluation Paradigm},
  publisher = {The tenth Machine Translation Summit},
  pages     = {125--132},
  year      = {2005}
}
@inproceedings{DBLP:conf/acl/Och03,
  author    = {Franz Josef Och},
  title     = {Minimum Error Rate Training in Statistical Machine Translation},
  pages     = {160--167},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003},
}
@inproceedings{DBLP:conf/wmt/Post18,
author = {Matt Post},
title = {A Call for Clarity in Reporting {BLEU} Scores},
pages = {186--191},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018},
}
@inproceedings{he2012maximum,
title={Maximum expected bleu training of phrase and lexicon translation models},
author={He, Xiaodong and Deng, Li},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={292--301},
year={2012}
}
@inproceedings{DBLP:conf/acl/ChenG15,
author = {Boxing Chen and
Hongyu Guo},
title = {Representation Based Translation Evaluation Metrics},
pages = {150--155},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015},
}
@inproceedings{kulesza2004learning,
title={A learning approach to improving sentence-level MT evaluation},
author={Kulesza, Alex and Shieber, Stuart},
publisher={Proceedings of the 10th International Conference on Theoretical and Methodological Issues in Machine Translation},
year={2004}
}
@inproceedings{corston2001machine,
title={A machine learning approach to the automatic evaluation of machine translation},
author={Corston-Oliver, Simon and Gamon, Michael and Brockett, Chris},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={148--155},
year={2001}
}
@article{albrecht2008regression,
title={Regression for machine translation evaluation at the sentence level},
author={Albrecht, Joshua S and Hwa, Rebecca},
volume={22},
number={1-2},
pages={1},
year={2008},
publisher={Springer}
}
@inproceedings{duh2008ranking,
title={Ranking vs. regression in machine translation evaluation},
author={Duh, Kevin},
publisher={Proceedings of the Third Workshop on Statistical Machine Translation},
pages={191--194},
year={2008}
}
@inproceedings{chen2015multi,
title={Multi-level evaluation for machine translation},
author={Chen, Boxing and Guo, Hongyu and Kuhn, Roland},
publisher={Proceedings of the Tenth Workshop on Statistical Machine Translation},
pages={361--365},
year={2015}
}
@book{DBLP:books/sp/EfronT93,
author = {Bradley Efron and
Robert Tibshirani},
title = {An Introduction to the Bootstrap},
publisher = {Springer},
year = {1993}
}
@inproceedings{DBLP:conf/emnlp/Koehn04,
author = {Philipp Koehn},
title = {Statistical Significance Tests for Machine Translation Evaluation},
pages = {388--395},
publisher = {{ACL}},
year = {2004}
}
@book{noreen1989computer,
title={Computer-intensive methods for testing hypotheses},
author={Noreen, Eric W},
year={1989},
publisher={Wiley New York}
}
@inproceedings{DBLP:conf/acl/RiezlerM05,
author = {Stefan Riezler and
John T. Maxwell III},
title = {On Some Pitfalls in Automatic Evaluation and Significance Testing
for {MT}},
pages = {57--64},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2005}
}
@inproceedings{DBLP:conf/emnlp/Berg-KirkpatrickBK12,
author = {Taylor Berg-Kirkpatrick and
David Burkett and
Dan Klein},
title = {An Empirical Investigation of Statistical Significance in {NLP}},
pages = {995--1005},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2012}
}
@inproceedings{DBLP:conf/wmt/Bicici13a,
  author    = {Ergun Bi{\c{c}}ici},
  title     = {Referential Translation Machines for Quality Estimation},
  pages     = {343--351},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/wmt/SouzaBTN13,
  author    = {Jos{\'{e}} Guilherme Camargo de Souza and
               Christian Buck and
               Marco Turchi and
               Matteo Negri},
  title     = {FBK-UEdin Participation to the {WMT13} Quality Estimation Shared Task},
  pages     = {352--358},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/wmt/SouzaGBTN14,
  author    = {Jos{\'{e}} Guilherme Camargo de Souza and
               Jes{\'{u}}s Gonz{\'{a}}lez-Rubio and
               Christian Buck and
               Marco Turchi and
               Matteo Negri},
  title     = {FBK-UPV-UEdin participation in the {WMT14} Quality Estimation shared-task},
  pages     = {322--328},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/wmt/Espla-GomisSF15,
  author    = {Miquel Espl{\`{a}}-Gomis and
               Felipe S{\'{a}}nchez-Mart{\'{\i}}nez and
               Mikel L. Forcada},
  title     = {UAlacant word-level machine translation quality estimation system
               at {WMT} 2015},
  pages     = {309--315},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/wmt/KreutzerSR15,
  author    = {Julia Kreutzer and
               Shigehiko Schamoni and
               Stefan Riezler},
  title     = {QUality Estimation from ScraTCH {(QUETCH):} Deep Learning for Word-level
               Translation Quality Estimation},
  pages     = {316--322},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/wmt/MartinsAHK16,
  author    = {Andr{\'{e}} F. T. Martins and
               Ram{\'{o}}n Fern{\'{a}}ndez Astudillo and
               Chris Hokamp and
               Fabio Kepler},
  title     = {Unbabel's Participation in the {WMT16} Word-Level Translation Quality
               Estimation Shared Task},
  pages     = {806--811},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:conf/wmt/ChenTZXZLW17,
  author    = {Zhiming Chen and
               Yiming Tan and
               Chenlin Zhang and
               Qingyu Xiang and
               Lilin Zhang and
               Maoxi Li and
               Mingwen Wang},
  title     = {Improving Machine Translation Quality Estimation with Neural Network
               Features},
  pages     = {551--555},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{kreutzer2015quality,
  title={Quality estimation from scratch (quetch): Deep learning for word-level translation quality estimation},
  author={Kreutzer, Julia and Schamoni, Shigehiko and Riezler, Stefan},
  publisher={Proceedings of the Tenth Workshop on Statistical Machine Translation},
  pages={316--322},
  year={2015}
}
@inproceedings{DBLP:conf/wmt/ScartonBSSS16,
  author    = {Carolina Scarton and
               Daniel Beck and
               Kashif Shah and
               Karin Sim Smith and
               Lucia Specia},
  title     = {Word embeddings and discourse information for Quality Estimation},
  pages     = {831--837},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:conf/wmt/AbdelsalamBE16,
  author    = {Amal Abdelsalam and
               Ondrej Bojar and
               Samhaa El-Beltagy},
  title     = {Bilingual Embeddings and Word Alignments for Translation Quality Estimation},
  pages     = {764--771},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:conf/wmt/BasuPN18,
  author    = {Prasenjit Basu and
               Santanu Pal and
               Sudip Kumar Naskar},
  title     = {Keep It or Not: Word Level Quality Estimation for Post-Editing},
  pages     = {759--764},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/wmt/Lo19,
  author    = {Chi-kiu Lo},
  title     = {YiSi - a Unified Semantic {MT} Quality Evaluation and Estimation Metric
               for Languages with Different Levels of Available Resources},
  pages     = {507--513},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/YankovskayaTF19,
  author    = {Elizaveta Yankovskaya and
               Andre T{\"{a}}ttar and
               Mark Fishel},
  title     = {Quality Estimation and Translation Metrics via Pre-trained Word and
               Sentence Embeddings},
  pages     = {101--105},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/Qi19,
  author    = {Hou Qi},
  title     = {{NJU} Submissions for the {WMT19} Quality Estimation Shared Task},
  pages     = {95--100},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/ZhouZH19,
  author    = {Junpei Zhou and
               Zhisong Zhang and
               Zecong Hu},
  title     = {{SOURCE:} SOURce-Conditional Elmo-style Model for Machine Translation
               Quality Estimation},
  pages     = {106--111},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/Hokamp17,
  author    = {Chris Hokamp},
  title     = {Ensembling Factored Neural Machine Translation Models for Automatic
               Post-Editing and Quality Estimation},
  pages     = {647--654},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/wmt/KimLKN19,
  author    = {Hyun Kim and
               Joon-Ho Lim and
               Hyun-Ki Kim and
               Seung-Hoon Na},
  title     = {{QE} {BERT:} Bilingual {BERT} Using Multi-task Learning for Neural
               Quality Estimation},
  pages     = {85--89},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/HildebrandV13,
  author    = {Silja Hildebrand and
               Stephan Vogel},
  title     = {{MT} Quality Estimation: The {CMU} System for WMT'13},
  pages     = {373--379},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@article{kepler2019unbabel,
  title={Unbabel's Participation in the WMT19 Translation Quality Estimation Shared Task},
  pages={78--84},
  author={Kepler, F{\'a}bio and Tr{\'e}nous, Jonay and Treviso, Marcos and Vera, Miguel and G{\'o}is, Ant{\'o}nio and Farajian, M Amin and Lopes, Ant{\'o}nio V and Martins, Andr{\'e} FT},
  year={2019}
}
@inproceedings{martins2016unbabel,
  title={Unbabel’s participation in the wmt16 word-level translation quality estimation shared task},
  author={Martins, Andr{\'e} FT and Astudillo, Ram{\'o}n and Hokamp, Chris and Kepler, Fabio},
  publisher={Proceedings of the First Conference on Machine Translation},
  pages={806--811},
  year={2016}
}
@inproceedings{DBLP:conf/wmt/ShahLPBBBS15,
  author    = {Kashif Shah and
               Varvara Logacheva and
               Gustavo Paetzold and
               Fr{\'{e}}d{\'{e}}ric Blain and
               Daniel Beck and
               Fethi Bougares and
               Lucia Specia},
  title     = {{SHEF-NN:} Translation Quality Estimation with Neural Networks},
  pages     = {342--347},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@book{huang2019machine,
  title={Machine Translation: 15th China Conference, CCMT 2019, Nanchang, China, September 27--29, 2019, Revised Selected Papers},
  author={Huang, Shujian and Knight, Kevin},
  volume={1104},
  year={2019},
  publisher={Springer Nature}
}
@article{akaike1974new,
  title={A new look at the statistical model identification},
  author={Akaike, Hirotugu},
  volume={19},
  number={6},
  pages={716--723},
  year={1974},
  publisher={IEEE}
}
@inproceedings{wang2019niutrans,
  title={NiuTrans Submission for CCMT19 Quality Estimation Task},
  author={Wang, Ziyang and Liu, Hui and Chen, Hexuan and Feng, Kai and Wang, Zeyang and Li, Bei and Xu, Chen and Xiao, Tong and Zhu, Jingbo},
  pages={82--92},
  year={2019},
  publisher={Springer}
}
@book{jurafsky2000speech,
  title={Speech \& language processing},
  author={Jurafsky, Dan},
  year={2000},
  publisher={Pearson Education India}
}
%%%%% chapter 4------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 5------------------------------------------------------
@article{brown1990statistical,
  author    = {Peter F. Brown and
               John Cocke and
               Stephen Della Pietra and
               Vincent J. Della Pietra and
               Frederick Jelinek and
               John D. Lafferty and
               Robert L. Mercer and
               Paul S. Roossin},
  title     = {A Statistical Approach to Machine Translation},
  journal   = {Computational Linguistics},
  volume    = {16},
  number    = {2},
  pages     = {79--85},
  year      = {1990}
}

@article{knight1999decoding,
  author    = {Kevin Knight},
  title     = {Decoding Complexity in Word-Replacement Translation Models},
  journal   = {Computational Linguistics},
  volume    = {25},
  number    = {4},
  pages     = {607--615},
  year      = {1999}
}
@article{shannon1949communication,
  title ={Communication theory of secrecy systems},
  author ={Claude Elwood Shannon},
  journal ={Bell system technical journal},
  volume ={28},
  number ={4},
  pages ={656--715},
  year ={1949}
}
@inproceedings{DBLP:conf/acl/Moore04,
  author    = {Robert C. Moore},
  title     = {Improving {IBM} Word Alignment Model 1},
  pages     = {518--525},
  publisher = {Annual Meeting of the Association for Computational
               Linguistics},
  year      = {2004}
}
@article{肖桐1991面向统计机器翻译的重对齐方法研究,
  title={面向统计机器翻译的重对齐方法研究},
  author={肖桐 and
          李天宁 and
          陈如山 and
          朱靖波 and
          王会珍},
  journal={中文信息学报},
  volume={24},
  number={110--116},
  year={2010},
}

@inproceedings{2005Improvin,
  author    = {Hua Wu and
               Haifeng Wang},
  title     = {Improving Statistical Word Alignment with Ensemble Methods},
  volume    = {3651},
  pages     = {462--473},
  publisher = {International Joint Conference on Natural Language Processing},
  year      = {2005}
}
@article{1998Grammar,
  title={Grammar Inference and Statistical Machine Translation},
  author={Ye-Yi Wang and Wayne Ward},
  year={1999},
  publisher={Carnegie Mellon University}
}

@inproceedings{DBLP:conf/acl-vlc/DaganCG93,
  author    = {Ido Dagan and
               Kenneth Ward Church and
               Willian Gale},
  title     = {Robust Bilingual Word Alignment for Machine Aided Translation},
  publisher = {Very Large Corpora},
  year      = {1993}
}

@inproceedings{DBLP:conf/naacl/GaleC91,
  author    = {William A. Gale and
               Kenneth Ward Church},
  title     = {Identifying Word Correspondences in Parallel Texts},
  publisher = {Morgan Kaufmann},
  year      = {1991}
}
@inproceedings{DBLP:conf/naacl/LiangTK06,
  author    = {Percy Liang and
               Benjamin Taskar and
               Dan Klein},
  title     = {Alignment by Agreement},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/naacl/DyerCS13,
  author    = {Chris Dyer and
               Victor Chahuneau and
               Noah A. Smith},
  title     = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2},
  pages     = {644--648},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/acl/DeNeroK07,
  author    = {John DeNero and
               Dan Klein},
  title     = {Tailoring Word Alignments to Syntactic Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{paul2007all,
  author = {Paul C Davis，Zhuli Xie and
            Kevin Small},
  publisher={Machine Translation Summit XI},
  title  = {All Links are not the Same: Evaluating Word Alignments for Statistical Machine Translation},
  year   = {2007}
}

@article{黄书剑2009一种错误敏感的词对齐评价方法,
  title={一种错误敏感的词对齐评价方法},
  author={黄书剑 and
          奚宁 and
          赵迎功 and
          戴新宇 and
          陈家骏},
  journal={中文信息学报},
  volume={23},
  number={88-94},
  year={2009}
}
@article{DBLP:journals/coling/FraserM07,
  author    = {Alexander Fraser and
               Daniel Marcu},
  title     = {Measuring Word Alignment Quality for Statistical Machine Translation},
  journal   = {Computational Linguistics},
  volume    = {33},
  number    = {3},
  pages     = {293--303},
  year      = {2007}
}
@article{DBLP:journals/corr/FengLLZ16,
  author    = {Shi Feng and
               Shujie Liu and
               Mu Li and
               Ming Zhou},
  title     = {Implicit Distortion and Fertility Models for Attention-based Encoder-Decoder
               {NMT} Model},
  journal   = {CoRR},
  volume    = {abs/1601.03317},
  year      = {2016}
}
@inproceedings{DBLP:conf/coling/UdupaFM04,
  author    = {Raghavendra Udupa and
               Tanveer A. Faruquie and
               Hemanta Kumar Maji},
  title     = {An Algorithmic Framework for Solving the Decoding Problem in Statistical
               Machine Translation},
  year      = {2004},
  publisher  = {International Conference on Computational Linguistics}
}
@inproceedings{DBLP:conf/naacl/RiedelC09,
  author    = {Sebastian Riedel and
               James Clarke},
  title     = {Revisiting Optimal Decoding for Machine Translation {IBM} Model 4},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009},
}
@inproceedings{DBLP:conf/eacl/UdupaM06,
  author    = {Raghavendra Udupa and
               Hemanta Kumar Maji},
  title     = {Computational Complexity of Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/emnlp/LeuschMN08,
  author    = {Gregor Leusch and
               Evgeny Matusov and
               Hermann Ney},
  title     = {Complexity of Finding the BLEU-optimal Hypothesis in a Confusion Network},
  pages     = {839--847},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@article{DBLP:journals/mt/FlemingKN15,
  author    = {Noah Fleming and
               Antonina Kolokolova and
               Renesa Nizamee},
  title     = {Complexity of alignment and decoding problems: restrictions and approximations},
  journal   = {Machine Translation},
  volume    = {29},
  number    = {3-4},
  pages     = {163--187},
  year      = {2015}
}
%%%%% chapter 5------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 6------------------------------------------------------

@inproceedings{ittycheriah2005maximum,
  author    = {Abraham Ittycheriah and
               Salim Roukos},
  title     = {A Maximum Entropy Word Aligner for Arabic-English Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{koehn2003statistical,
  author    = {Philipp Koehn and
               Franz Josef Och and
               Daniel Marcu},
  title     = {Statistical Phrase-Based Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003}
}
@book{manning1999foundations,
  title ={Foundations of statistical natural language processing},
  author ={Manning, Christopher D and Manning, Christopher D and Sch{\"u}tze, Hinrich},
  year ={1999},
  publisher ={Massachusetts Institute of Technology Press}
}
@article{och2003systematic,
  author    = {Franz Josef Och and
               Hermann Ney},
  title     = {A Systematic Comparison of Various Statistical Alignment Models},
  journal   = {Computational Linguistics},
  volume    = {29},
  number    = {1},
  pages     = {19--51},
  year      = {2003}
}
@article{och2004alignment,
  author    = {Franz Josef Och and
               Hermann Ney},
  title     = {The Alignment Template Approach to Statistical Machine Translation},
  journal   = {Computational Linguistics},
  volume    = {30},
  number    = {4},
  pages     = {417--449},
  year      = {2004}
}
@inproceedings{vogel1996hmm,
  author    = {Stephan Vogel and
               Hermann Ney and
               Christoph Tillmann},
  title     = {HMM-Based Word Alignment in Statistical Translation},
  publisher = {International Conference on Computational Linguistics},
  pages     = {836--841},
  year      = {1996}
}
@article{xiao2013unsupervised,
  title ={Unsupervised sub-tree alignment for tree-to-tree translation},
  author ={Tong Xiao and Jingbo Zhu},
  journal ={Journal of Artificial Intelligence Research},
  volume ={48},
  pages ={733--782},
  year ={2013}
}
@inproceedings{1966Decentering,
  author    = {Brown D.C.},
  title     = {Decentering Distortion of Lenses},
  publisher = {Photogrammetric Engineering},
  volume    = {32},
  pages     = {444--462},
  year      = {1966}
}
@inproceedings{ClausF05,
  author    = {David Claus and
               Andrew W. Fitzgibbon},
  title     = {A Rational Function Lens Distortion Model for General Cameras},
  pages     = {213--219},
  publisher = {{IEEE} Computer Society Conference on Computer Vision and Pattern
               Recognition},
  year      = {2005},
}
@inproceedings{ChiangLMMRS05,
  author    = {David Chiang and
               Adam Lopez and
               Nitin Madnani and
               Christof Monz and
               Philip Resnik and
               Michael Subotin},
  title     = {The Hiero Machine Translation System: Extensions, Evaluation, and
               Analysis},
  pages     = {779--786},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005},
}
%%%%% chapter 6------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 7------------------------------------------------------
@article{DBLP:journals/tit/Viterbi67,
  author    = {Andrew J. Viterbi},
  title     = {Error bounds for convolutional codes and an asymptotically optimum
               decoding algorithm},
  journal   = {IEEE Transactions on Information Theory},
  volume    = {13},
  number    = {2},
  pages     = {260--269},
  year      = {1967}
}
@inproceedings{DBLP:conf/acl/OchN02,
  author    = {Franz Josef Och and
               Hermann Ney},
  title     = {Discriminative Training and Maximum Entropy Models for Statistical
               Machine Translation},
  pages     = {295--302},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2002}
}
@inproceedings{koehn2000estimating,
  author    = {Philipp Koehn and
               Kevin Knight},
  title     = {Estimating Word Translation Probabilities from Unrelated Monolingual
               Corpora Using the {EM} Algorithm},
  pages     = {711--715},
  publisher = {AAAI Press},
  year      = {2000}
}
@inproceedings{taskar2005a,
  author    = {Benjamin Taskar and
               Simon Lacoste-Julien and
               Dan Klein},
  title     = {A Discriminative Matching Approach to Word Alignment},
  pages     = {73--80},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{DBLP:conf/coling/OchN00,
  author    = {Franz Josef Och and
               Hermann Ney},
  title     = {A Comparison of Alignment Models for Statistical Machine Translation},
  pages     = {1086--1090},
  publisher = {Morgan Kaufmann},
  year      = {2000}
}
@inproceedings{koehn2002learning,
    author = {Kevin Knight},
    title = {Learning a translation lexicon from monolingual corpora},
	publisher = {Annual Meeting of the Association for Computational Linguistics},
    year = {2002},
    pages = {9--16}
}
@inproceedings{Gros2008MSD,
	author ={Gros, Jerneja {\v{Z}}ganec},
	title ={MSD Recombination Method in Statistical Machine Translation},
	volume ={1060},
	pages ={186--189},
	publisher ={American Institute of Physics},
	year ={2008},
}
@inproceedings{xiong2006maximum,
  author    = {Deyi Xiong and
               Qun Liu and
               Shouxun Lin},
  title     = {Maximum Entropy Based Phrase Reordering Model for Statistical Machine
               Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/naacl/KumarB05,
  author    = {Shankar Kumar and
               William J. Byrne},
  title     = {Local Phrase Reordering Models for Statistical Machine Translation},
  pages     = {161--168},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{li-etal-2014-neural,
  author    = {Peng Li and
               Yang Liu and
               Maosong Sun and
               Tatsuya Izuha and
               Dakun Zhang},
  title     = {A Neural Reordering Model for Phrase-based Translation},
  pages     = {1897--1907},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}

@article{powell1964an,
  author    = {M. J. D. Powell},
  title     = {An efficient method for finding the minimum of a function of several
               variables without calculating derivatives},
  journal   = {The Computer Journal},
  volume    = {7},
  number    = {2},
  pages     = {155--162},
  year      = {1964}
}
@inproceedings{DBLP:conf/emnlp/ChiangMR08,
  author    = {David Chiang and
               Yuval Marton and
               Philip Resnik},
  title     = {Online Large-Margin Training of Syntactic and Structural Translation
               Features},
  pages     = {224--233},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{Hopkins2011Tuning,
  author    = {Mark Hopkins and
               Jonathan May},
  title     = {Tuning as Ranking},
  pages     = {1352--1362},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/acl/KleinM03,
  author    = {Dan Klein and
               Christopher D. Manning},
  title     = {Accurate Unlexicalized Parsing},
  pages     = {423--430},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003}
}
@inproceedings{DBLP:conf/acl/OchW98,
  author    = {Franz Josef Och and
               Hans Weber},
  title     = {Improving Statistical Natural Language Translation with Categories
               and Rules},
  pages     = {985--989},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {1998}
}
@phdthesis{DBLP:phd/dnb/Och02,
  author    = {Franz Josef Och},
  title     = {Statistical machine translation: from single word models to alignment
               templates},
  publisher = {{RWTH} Aachen University, Germany},
  year      = {2002}
}
@inproceedings{DBLP:conf/acl/WangW98,
  author    = {Ye-Yi Wang and
               Alex Waibel},
  title     = {Modeling with Structures in Statistical Machine Translation},
  pages     = {1357--1363},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {1998}
}
@inproceedings{DBLP:conf/acl/WatanabeSO03,
  author    = {Taro Watanabe and
               Eiichiro Sumita and
               Hiroshi G. Okuno},
  title     = {Chunk-Based Statistical Translation},
  pages     = {303--310},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003}
}
@inproceedings{DBLP:conf/acl/Marcu01,
  author    = {Daniel Marcu},
  title     = {Towards a Unified Approach to Memory- and Statistical-Based Machine
               Translation},
  pages     = {378--385},
  publisher = {Morgan Kaufmann Publishers},
  year      = {2001}
}
@inproceedings{DBLP:conf/ki/ZensON02,
  author    = {Richard Zens and
               Franz Josef Och and
               Hermann Ney},
  title     = {Phrase-Based Statistical Machine Translation},
  volume    = {2479},
  pages     = {18--32},
  publisher = {Springer},
  year      = {2002}
}
@inproceedings{DBLP:conf/naacl/ZensN04,
  author    = {Richard Zens and
               Hermann Ney},
  title     = {Improvements in Phrase-Based Statistical Machine Translation},
  pages     = {257--264},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/emnlp/MarcuW02,
  author    = {Daniel Marcu and
               Daniel Wong},
  title     = {A Phrase-Based, Joint Probability Model for Statistical Machine Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {133--139},
  year      = {2002}
}
@inproceedings{DBLP:conf/wmt/DeNeroGZK06,
  author    = {John DeNero and
               Dan Gillick and
               James Zhang and
               Dan Klein},
  title     = {Why Generative Phrase Models Underperform Surface Heuristics},
  pages     = {31--38},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{german2011bilingual,
  author    = {German Sanchis-Trilles and
               Daniel Ortiz-Martinez and
               Jesus Gonzalez-Rubio and
               Jorge Gonzalez and
			   Francisco Casacuberta},
  title     = {Bilingual segmentation for phrasetable pruning in Statistical Machine Translation},
  pages     = {257--264},
  publisher = {Conference of the European Association for Machine Translation},
  year      = {2011}
}
@inproceedings{DBLP:conf/coling/BlackwoodGB08,
  author    = {Graeme W. Blackwood and
               Adri{\`{a}} de Gispert and
               William Byrne},
  title     = {Phrasal Segmentation Models for Statistical Machine Translation},
  publisher = {International Conference on Computational Linguistics},
  pages     = {19--22},
  year      = {2008}
}
@inproceedings{DBLP:conf/naacl/XiongZL10,
  author    = {Deyi Xiong and
               Min Zhang and
               Haizhou Li},
  title     = {Learning Translation Boundaries for Phrase-Based Decoding},
  pages     = {136--144},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/naacl/Tillman04,
  author    = {Christoph Tillman},
  title     = {A Unigram Orientation Model for Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/acl/NagataSYO06,
  author    = {Masaaki Nagata and
               Kuniko Saito and
               Kazuhide Yamamoto and
               Kazuteru Ohashi},
  title     = {A Clustered Global Phrase Reordering Model for Statistical Machine
               Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/wmt/ZensN06,
  author    = {Richard Zens and
               Hermann Ney},
  title     = {Discriminative Reordering Models for Statistical Machine Translation},
  pages     = {55--63},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/naacl/GreenGM10,
  author    = {Spence Green and
               Michel Galley and
               Christopher D. Manning},
  title     = {Improved Models of Distortion Cost for Statistical Machine Translation},
  pages     = {867--875},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/naacl/Cherry13,
  author    = {Colin Cherry},
  title     = {Improved Reordering for Phrase-Based Translation using Sparse Features},
  pages     = {22--31},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/wmt/HuckWRN13,
  author    = {Matthias Huck and
               Joern Wuebker and
               Felix Rietig and
               Hermann Ney},
  title     = {A Phrase Orientation Model for Hierarchical Machine Translation},
  pages     = {452--463},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}

@inproceedings{matthias2012discriminative,
  author    = {Matthias Huck and
			   Stephan Peitz and
               Markus Freitag and
               Hermann Ney},
  title     = {Discriminative Reordering Extensions for Hierarchical Phrase-Based Machine Translation },
  publisher = {International Conference on Material Engineering and Advanced Manufacturing Technology},
  year      = {2012}
}
@inproceedings{vinh2009improving,
  author    = {Vinh Van Nguyen and
			   Akira Shimazu and
               Minh Le Nguyen and
               Thai Phuong Nguyen},
  title     = {Improving a Lexicalized Hierarchical Reordering Model Using Maximum Entropy},
  publisher = {Machine Translation Summit XII},
  year      = {2009}
}
@article{DBLP:journals/coling/BisazzaF16,
  author    = {Arianna Bisazza and
               Marcello Federico},
  title     = {A Survey of Word Reordering in Statistical Machine Translation: Computational
               Models and Language Phenomena},
  journal   = {Computational Linguistics},
  volume    = {42},
  number    = {2},
  pages     = {163--205},
  year      = {2016}
}
@inproceedings{DBLP:conf/coling/XiaM04,
  author    = {Fei Xia and
               Michael C. McCord},
  title     = {Improving a Statistical {MT} System with Automatically Learned Rewrite
               Patterns},
  publisher = {International Conference on Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/acl/CollinsKK05,
  author    = {Michael Collins and
               Philipp Koehn and
               Ivona Kucerova},
  title     = {Clause Restructuring for Statistical Machine Translation},
  pages     = {531--540},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{DBLP:conf/emnlp/WangCK07,
  author    = {Chao Wang and
               Michael Collins and
               Philipp Koehn},
  title     = {Chinese Syntactic Reordering for Statistical Machine Translation},
  pages     = {737--745},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/ijcnlp/WuSDTN11,
  author    = {Xianchao Wu and
               Katsuhito Sudoh and
               Kevin Duh and
               Hajime Tsukada and
               Masaaki Nagata},
  title     = {Extracting Pre-ordering Rules from Predicate-Argument Structures},
  pages     = {29--37},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/coling/TillmannN00,
  author    = {Christoph Tillmann and
               Hermann Ney},
  title     = {Word Re-ordering and DP-based Search in Statistical Machine Translation},
  pages     = {850--856},
  publisher = {Morgan Kaufmann},
  year      = {2000}
}
@inproceedings{DBLP:conf/iwslt/ShenDA06a,
  author    = {Wade Shen and
               Brian Delaney and
               Timothy R. Anderson},
  title     = {An efficient graph search decoder for phrase-based statistical machine
               translation},
  pages     = {197--204},
  publisher = {International Symposium on Computer Architecture},
  year      = {2006}
}

@inproceedings{robert2007faster,
  author    = {Robert C. Moore and
               Chris Quirk},
  title     = {Faster Beam-Search Decoding for Phrasal Statistical Machine Translation},
  publisher = {Machine Translation Summit XI},
  year      = {2007}
}
@inproceedings{DBLP:conf/acl/HeafieldKM14,
  author    = {Kenneth Heafield and
               Michael Kayser and
               Christopher D. Manning},
  title     = {Faster Phrase-Based Decoding by Refining Feature State},
  pages     = {130--135},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/acl/WuebkerNZ12,
  author    = {Joern Wuebker and
               Hermann Ney and
               Richard Zens},
  title     = {Fast and Scalable Decoding with Language Model Look-Ahead for Phrase-based
               Statistical Machine Translation},
  pages     = {28--32},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/iwslt/ZensN08,
  author    = {Richard Zens and
               Hermann Ney},
  title     = {Improvements in dynamic programming beam search for phrase-based statistical
               machine translation},
  pages     = {198--205},
  publisher = {International Symposium on Computer Architecture},
  year      = {2008}
}
@inproceedings{och2004smorgasbord,
  author    = {Franz Josef Och and
               Daniel Gildea and
               Sanjeev Khudanpur and
               Anoop Sarkar and
               Kenji Yamada and
               Alexander M. Fraser and
               Shankar Kumar and
               Libin Shen and
               David Smith and
               Katherine Eng and
               Viren Jain and
               Zhen Jin and
               Dragomir R. Radev},
  title     = {A Smorgasbord of Features for Statistical Machine Translation},
  pages     = {161--168},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2004}
}
@inproceedings{Chiang200911,
  author    = {David Chiang and
               Kevin Knight and
               Wei Wang},
  title     = {11,001 New Features for Statistical Machine Translation},
  pages     = {218--226},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{gildea2003loosely,
  author    = {Daniel Gildea},
  title     = {Loosely Tree-Based Alignment for Machine Translation},
  pages     = {80--87},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003}
}
@inproceedings{Blunsom2008A,
  author    = {Phil Blunsom and
               Trevor Cohn and
               Miles Osborne},
  title     = {A Discriminative Latent Variable Model for Statistical Machine Translation},
  pages     = {200--208},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{Blunsom2009A,
  author    = {Phil Blunsom and
               Trevor Cohn and
               Chris Dyer and
               Miles Osborne},
  title     = {A Gibbs Sampler for Phrasal Synchronous Grammar Induction},
  pages     = {782--790},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{Cohn2009A,
  author    = {Trevor Cohn and
               Phil Blunsom},
  title     = {A Bayesian Model of Syntax-Directed Tree to String Grammar Induction},
  pages     = {352--361},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{smith2006minimum,
  author    = {David A. Smith and
               Jason Eisner},
  title     = {Minimum Risk Annealing for Training Log-Linear Models},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{li2009first,
  author    = {Zhifei Li and
               Jason Eisner},
  title     = {First- and Second-Order Expectation Semirings with Applications to
               Minimum-Risk Training on Translation Forests},
  pages     = {40--51},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{watanabe2007online,
  author    = {Taro Watanabe and
               Jun Suzuki and
               Hajime Tsukada and
               Hideki Isozaki},
  title     = {Online Large-Margin Training for Statistical Machine Translation},
  pages     = {764--773},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007},
}
@inproceedings{dreyer2015apro,
  author    = {Markus Dreyer and
               Yuanzhe Dong},
  title     = {{APRO:} All-Pairs Ranking Optimization for {MT} Tuning},
  pages     = {1018--1023},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@article{XiaoA,
  author    = {Tong Xiao and
               Derek F. Wong and
               Jingbo Zhu},
  title     = {A Loss-Augmented Approach to Training Syntactic Machine Translation
               Systems},
  journal   = {IEEE Transactions on Audio, Speech, and Language Processing},
  volume    = {24},
  number    = {11},
  pages     = {2069--2083},
  year      = {2016}
}

@book{marcu2006practical,
  title={Practical structured learning techniques for natural language processing},
  author={Daume Iii, Harold Charles },
  publisher={University of Southern California},
  year={2006},
}
@inproceedings{DBLP:conf/emnlp/SchwenkCF07,
  author    = {Holger Schwenk and
               Marta R. Costa-juss{\`{a}} and
               Jos{\'{e}} A. R. Fonollosa},
  title     = {Smooth Bilingual N-Gram Translation},
  pages     = {430--438},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{boxing2011unpacking,
  author    = {Boxing Chen and
               Roland Kuhn and
               George Foster and
			   Howard Johnson},
  title     = {Unpacking and Transforming Feature Functions: New Ways to Smooth Phrase Tables},
  publisher = {Machine Translation Summit},
  year      = {2011}
}
@inproceedings{DBLP:conf/coling/DuanSZ10,
  author    = {Nan Duan and
               Hong Sun and
               Ming Zhou},
  title     = {Translation Model Generalization using Probability Averaging for Machine
               Translation},
  publisher = {International Conference on Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/naacl/QuirkM06,
  author    = {Christopher Quirk and
               Arul Menezes},
  title     = {Do we need phrases? Challenging the conventional wisdom in Statistical
               Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@article{DBLP:journals/coling/MarinoBCGLFC06,
  author    = {Jos{\'{e}} B. Mari{\~{n}}o and
               Rafael E. Banchs and
               Josep Maria Crego and
               Adri{\`{a}} de Gispert and
               Patrik Lambert and
               Jos{\'{e}} A. R. Fonollosa and
               Marta R. Costa-juss{\`{a}}},
  title     = {\emph{N}-gram-based Machine Translation},
  journal   = {Computational Linguistics},
  volume    = {32},
  number    = {4},
  pages     = {527--549},
  year      = {2006}
}
@inproceedings{DBLP:conf/emnlp/ZensSX12,
  author    = {Richard Zens and
               Daisy Stanton and
               Peng Xu},
  title     = {A Systematic Comparison of Phrase Table Pruning Techniques},
  pages     = {972--983},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/emnlp/JohnsonMFK07,
  author    = {Howard Johnson and
               Joel D. Martin and
               George F. Foster and
               Roland Kuhn},
  title     = {Improving Translation Quality by Discarding Most of the Phrasetable},
  pages     = {967--975},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/emnlp/LingGTB12,
  author    = {Wang Ling and
               Jo{\~{a}}o Gra{\c{c}}a and
               Isabel Trancoso and
               Alan W. Black},
  title     = {Entropy-based Pruning for Phrase-based Machine Translation},
  pages     = {962--971},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/naacl/ZettlemoyerM07,
  author    = {Luke S. Zettlemoyer and
               Robert C. Moore},
  title     = {Selective Phrase Pair Extraction for Improved Statistical Machine
               Translation},
  pages     = {209--212},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/naacl/EckVW07,
  author    = {Matthias Eck and
               Stephan Vogel and
               Alex Waibel},
  title     = {Translation Model Pruning via Usage Statistics for Statistical Machine
               Translation},
  pages     = {21--24},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007},
}
@inproceedings{DBLP:conf/acl/Callison-BurchBS05,
  author    = {Chris Callison-Burch and
               Colin J. Bannard and
               Josh Schroeder},
  title     = {Scaling Phrase-Based Statistical Machine Translation to Larger Corpora
               and Longer Phrases},
  pages     = {255--262},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}

@inproceedings{DBLP:conf/acl/Callison-BurchBS05,
  author    = {Paul McNamee and James Mayfield},
  title     = {Translation of Multiword Expressions Using Parallel Suffix Arrays},
  publisher = {Association for Machine Translation in the Americas},
  year      = {2006}
}

@inproceedings{DBLP:conf/naacl/ZensN07,
  author    = {Richard Zens and
               Hermann Ney},
  title     = {Efficient Phrase-Table Representation for Machine Translation with
               Applications to Online {MT} and Speech Translation},
  pages     = {492--499},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{2014Dynamic,
  title={Dynamic Phrase Tables for Machine Translation in an Interactive Post-editing Scenario},
  author={Ulrich Germann },
  publisher = {Association for Machine Translation in the Americas},
  year={2014},
}
%%%%% chapter 7------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 8------------------------------------------------------
@article{Chiang2012Hope,
  author    = {David Chiang},
  title     = {Hope and Fear for Discriminative Training of Statistical Translation
               Models},
  journal   = {Journal of Machine Learning Research},
  volume    = {13},
  pages     = {1159--1187},
  year      = {2012}
}
@article{chiang2007hierarchical,
    title={Hierarchical Phrase-Based Translation},
    author ={David Chiang},
    journal ={Computational Linguistics},
    volume ={33},
    number ={2},
    pages ={201--228},
    year ={2007}
}
@book{cocke1969programming,
  title ={Programming Languages and Their Compilers: Preliminary Notes},
  author ={Cocke, John and Schwartz, J.T.},
  year ={1970},
  publisher ={Courant Institute of Mathematical Sciences, New York University}
}
@article{younger1967recognition,
  author    = {Daniel H. Younger},
  title     = {Recognition and Parsing of Context-Free Languages in Time n{\^{3}}},
  journal   = {Information and Control},
  volume    = {10},
  number    = {2},
  pages     = {189--208},
  year      = {1967}
}
@article{kasami1966efficient,
  author ={Tadao Kasami},
  title ={An efficient recognition and syntax-analysis algorithm for context-free languages},
  journal ={Coordinated Science Laboratory Report no. R-257},
  year ={1966}
}
@inproceedings{huang2005better,
  author    = {Liang Huang and
               David Chiang},
  title     = {Better k-best Parsing},
  pages     = {53--64},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@article{wu1997stochastic,
  author    = {Dekai Wu},
  title     = {Stochastic Inversion Transduction Grammars and Bilingual Parsing of
               Parallel Corpora},
  journal   = {Computational Linguistics},
  volume    = {23},
  number    = {3},
  pages     = {377--403},
  year      = {1997}
}
@inproceedings{huang2006statistical,
  title ={Statistical syntax-directed translation with extended domain of locality},
  author ={Liang Huang and Kevin Knight and Aravind Joshi},
  pages ={66--73},
  year ={2006},
  publisher ={Computationally Hard Problems \& Joint Inference in Speech \& Language Processing}
}
@inproceedings{galley2004s,
  title ={What’s in a translation rule?},
  author ={Michel Galleyand Mark Hopkins and Kevin Knight and Daniel Marcu},
  publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
  pages ={273--280},
  year ={2004}
}
@inproceedings{eisner2003learning,
  author    = {Jason Eisner},
  title     = {Learning Non-Isomorphic Tree Mappings for Machine Translation},
  pages     = {205--208},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003}
}
@inproceedings{zhang2008tree,
  author    = {Min Zhang and
               Hongfei Jiang and
               AiTi Aw and
               Haizhou Li and
               Chew Lim Tan and
               Sheng Li},
  title     = {A Tree Sequence Alignment-based Tree-to-Tree Translation Model},
  pages     = {559--567},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{liu2009improving,
  author    = {Yang Liu and
               Yajuan L{\"{u}} and
               Qun Liu},
  title     = {Improving Tree-to-Tree Translation with Packed Forests},
  pages     = {558--566},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009},
}
@inproceedings{chiang2010learning,
  author    = {David Chiang},
  title     = {Learning to Translate with Source and Target Syntax},
  pages     = {1443--1452},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{marcu2006spmt,
  author    = {Daniel Marcu and
               Wei Wang and
               Abdessamad Echihabi and
               Kevin Knight},
  title     = {{SPMT:} Statistical Machine Translation with Syntactified Target Language
               Phrases},
  pages     = {44--52},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@article{xue2005building,
  title={Building a large annotated Chinese corpus: the Penn Chinese treebank},
  author={Xue, Nianwen and Xia, Fei and dong Chiou, Fu and Palmer, Martha},
  journal={Journal of Natural Language Engineering},
  volume={11},
  number={2},
  pages={207--238},
  year={2005}
}
@article{DBLP:journals/coling/MarcusSM94,
  author    = {Mitchell P. Marcus and
               Beatrice Santorini and
               Mary Ann Marcinkiewicz},
  title     = {Building a Large Annotated Corpus of English: The Penn Treebank},
  journal   = {Computational Linguistics},
  volume    = {19},
  number    = {2},
  pages     = {313--330},
  year      = {1993}
}
@inproceedings{DBLP:conf/naacl/ZhangHGK06,
  author    = {Hao Zhang and
               Liang Huang and
               Daniel Gildea and
               Kevin Knight},
  title     = {Synchronous Binarization for Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{Tong2009Better,
  author    = {Tong Xiao and
               Mu Li and
               Dongdong Zhang and
               Jingbo Zhu and
               Ming Zhou},
  title     = {Better Synchronous Binarization for Machine Translation},
  pages     = {362--370},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{charniak2006multilevel,
	title={Multilevel Coarse-to-Fine PCFG Parsing},
	author={Eugene {Charniak} and Mark {Johnson} and Micha {Elsner} and Joseph {Austerweil} and David {Ellis} and Isaac {Haxton} and Catherine {Hill} and R. {Shrivaths} and Jeremy {Moore} and Michael {Pozar} and Theresa {Vu}},
	publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
	pages={168--175},
	year={2006}
}
@inproceedings{DBLP:conf/coling/GrovesHW04,
  author    = {Declan Groves and
               Mary Hearne and
               Andy Way},
  title     = {Robust Sub-Sentential Alignment of Phrase-Structure Trees},
  publisher = {International Conference on Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/coling/SunZT10,
  author    = {Jun Sun and
               Min Zhang and
               Chew Lim Tan},
  title     = {Discriminative Induction of Sub-Tree Alignment using Limited Labeled
               Data},
  pages     = {1047--1055},
  publisher = {International Conference on Computational Linguistics},
  year      = {2010}
}
@inproceedings{liu2009weighted,
  author    = {Yang Liu and
               Tian Xia and
               Xinyan Xiao and
               Qun Liu},
  title     = {Weighted Alignment Matrices for Statistical Machine Translation},
  pages     = {1017--1026},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{sun2010exploring,
  author    = {Jun Sun and
               Min Zhang and
               Chew Lim Tan},
  title     = {Exploring Syntactic Structural Features for Sub-Tree Alignment Using
               Bilingual Tree Kernels},
  pages     = {306--315},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@article{ilprints729,
  title={Parsing and Hypergraphs},
  author={Klein, Dan  and  Manning, Christopher D.},
  journal={New Developments in Parsing Technology},
  volume={65},
  number={3},
  pages={123--134},
  year={2001},
}
@article{goodman1999semiring,
  author    = {Joshua Goodman},
  title     = {Semiring Parsing},
  journal   = {Computational Linguistics},
  volume    = {25},
  number    = {4},
  pages     = {573--605},
  year      = {1999}
}
@inproceedings{eisner2002parameter,
  author    = {Jason Eisner},
  title     = {Parameter Estimation for Probabilistic Finite-State Transducers},
  pages     = {1--8},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2002}
}
@inproceedings{zhu2011improving,
  author    = {Jingbo Zhu and
               Tong Xiao},
  title     = {Improving Decoding Generalization for Tree-to-String Translation},
  pages     = {418--423},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/acl/AlshawiBX97,
  author    = {Hiyan Alshawi and
               Adam L. Buchsbaum and
               Fei Xia},
  title     = {A Comparison of Head Transducers and Transfer for a Limited Domain
               Translation Application},
  pages     = {360--365},
  publisher = {Morgan Kaufmann Publishers},
  year      = {1997}
}
@inproceedings{DBLP:conf/acl-vlc/Wu95,
  author    = {Dekai Wu},
  title     = {Trainable Coarse Bilingual Grammars for Parallel Text Bracketing},
  publisher = {Third Workshop on Very Large Corpor},
  year      = {1995}
}
@inproceedings{DBLP:conf/acl/WuW98,
  author    = {Dekai Wu and
               Hongsing Wong},
  title     = {Machine Translation with a Stochastic Grammatical Channel},
  pages     = {1408--1415},
  publisher = {Morgan Kaufmann Publishers},
  year      = {1998}
}
@inproceedings{ja2006obtaining,
  author    = {J.A.Sánchez and J.M.Benedí.},
  title     = {Obtaining Word Phrases with Stochastic Inversion Transduction Grammars for Phrase-based Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/acl/ZhangQMG08,
  author    = {Hao Zhang and
               Chris Quirk and
               Robert C. Moore and
               Daniel Gildea},
  title     = {Bayesian Learning of Non-Compositional Phrases with Synchronous Parsing},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{DBLP:conf/coling/ZollmannVOP08,
  author    = {Andreas Zollmann and
               Ashish Venugopal and
               Franz Josef Och and
               Jay M. Ponte},
  title     = {A Systematic Comparison of Phrase-Based, Hierarchical and Syntax-Augmented
               Statistical {MT}},
  publisher = {International Conference on Computational Linguistics},
  pages     = {1145--1152},
  year      = {2008}
}
@inproceedings{DBLP:conf/acl/WatanabeTI06,
  author    = {Taro Watanabe and
               Hajime Tsukada and
               Hideki Isozaki},
  title     = {Left-to-Right Target Generation for Hierarchical Phrase-Based Translation},
  publisher = {Annual Meeting of the Association for Computational Linguisticss},
  year      = {2006}
}
@inproceedings{DBLP:conf/naacl/GalleyHKM04,
  author    = {Michel Galley and
               Mark Hopkins and
               Kevin Knight and
               Daniel Marcu},
  title     = {What's in a translation rule?},
  pages     = {273--280},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/naacl/HuangK06,
  author    = {Bryant Huang and
               Kevin Knight},
  title     = {Relabeling Syntax Trees to Improve Syntax-Based Machine Translation
               Quality},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/emnlp/DeNeefeKWM07,
  author    = {Steve DeNeefe and
               Kevin Knight and
			   Wei Wang and
               Daniel Marcu},
  title     = {What Can Syntax-Based {MT} Learn from Phrase-Based MT?},
  pages     = {755--763},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/wmt/LiuG08,
  author    = {Ding Liu and
               Daniel Gildea},
  title     = {Improved Tree-to-String Transducer for Machine Translation},
  pages     = {62--69},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@INPROCEEDINGS{Nesson06inductionof,
    author = {Rebecca Nesson and Stuart M. Shieber and Alexander Rush},
    title = {Induction of probabilistic synchronous tree-insertion grammars for machine translation},
    publisher = {Annual Meeting of the Association for Computational Linguistics},
    year = {2006}
}
@MISC{Zhang07atree-to-tree,
    author = {Min Zhang and Hongfei Jiang and Ai Ti Aw and Jun Sun and Sheng Li and Chew Lim Tan},
    title = {A Tree-to-Tree Alignment-based Model for Statistical Machine Translation},
    year = {2007},
	publisher = {Machine Translation Summit}
}
@inproceedings{DBLP:conf/emnlp/WangKM07,
  author    = {Wei Wang and
               Kevin Knight and
               Daniel Marcu},
  title     = {Binarizing Syntax Trees to Improve Syntax-Based Machine Translation
               Accuracy},
  pages     = {746--754},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/acl/MiHL08,
  author    = {Haitao Mi and
               Liang Huang and
               Qun Liu},
  title     = {Forest-Based Translation},
  pages     = {192--199},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{DBLP:conf/emnlp/MiH08,
  author    = {Haitao Mi and
               Liang Huang},
  title     = {Forest-based Translation Rule Extraction},
  pages     = {206--214},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{DBLP:conf/emnlp/ZhangZZ11,
  author    = {Jiajun Zhang and
               Feifei Zhai and
               Chengqing Zong},
  title     = {Augmenting String-to-Tree Translation Models with Fuzzy Use of Source-side
               Syntax},
  pages     = {204--215},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/wmt/ZollmannV06,
  author    = {Andreas Zollmann and
               Ashish Venugopal},
  title     = {Syntax Augmented Machine Translation via Chart Parsing},
  pages     = {138--141},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2006}
}
@inproceedings{DBLP:conf/acl/MartonR08,
  author    = {Yuval Marton and
               Philip Resnik},
  title     = {Soft Syntactic Constraints for Hierarchical Phrased-Based Translation},
  pages     = {1003--1011},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{DBLP:conf/wmt/PopelMGZ11,
  author    = {Martin Popel and
               David Marecek and
               Nathan Green and
               Zdenek Zabokrtsk{\'{y}}},
  title     = {Influence of Parser Choice on Dependency-Based {MT}},
  pages     = {433--439},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/coling/XiaoZZZ10,
  author    = {Tong Xiao and
               Jingbo Zhu and
               Hao Zhang and
               Muhua Zhu},
  title     = {An Empirical Study of Translation Rule Extraction with Multiple Parsers},
  pages     = {1345--1353},
  publisher = {Chinese Information Processing Society of China},
  year      = {2010}
}
@article{Zhai2012Treebased,
  title={Treebased translation without using parse trees},
  author    = {Feifei Zhai and
               Jiajun Zhang and
               Yu Zhou and
               Chengqing Zong},
  publisher = {International Conference on Computational Linguistics},
  year={2012},
}
@article{DBLP:journals/tacl/ZhaiZZZ13,
  author    = {Feifei Zhai and
               Jiajun Zhang and
               Yu Zhou and
               Chengqing Zong},
  title     = {Unsupervised Tree Induction for Tree-based Translation},
  journal   = {Transactions of Association for Computational Linguistic},
  volume    = {1},
  pages     = {243--254},
  year      = {2013}
}

@article{DBLP:journals/mt/QuirkM06,
  author    = {Christopher Quirk and
               Arul Menezes},
  title     = {Dependency treelet translation: the convergence of statistical and
               example-based machine-translation?},
  journal   = {Machine Translation},
  volume    = {20},
  number    = {1},
  pages     = {43--65},
  year      = {2006}
}

@inproceedings{DBLP:conf/wmt/XiongLL07,
  author    = {Deyi Xiong and
               Qun Liu and
               Shouxun Lin},
  title     = {A Dependency Treelet String Correspondence Model for Statistical Machine
               Translation},
  pages     = {40--47},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/coling/Lin04,
  author    = {Dekang Lin},
  title     = {A Path-based Transfer Model for Machine Translation},
  publisher = {International Conference on Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/acl/DingP05,
  author    = {Yuan Ding and
               Martha Palmer},
  title     = {Machine Translation Using Probabilistic Synchronous Dependency Insertion
               Grammars},
  pages     = {541--548},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2005}
}
@inproceedings{DBLP:conf/coling/ChenXMJL14,
  author    = {Hongshen Chen and
               Jun Xie and
               Fandong Meng and
               Wenbin Jiang and
               Qun Liu},
  title     = {A Dependency Edge-based Transfer Model for Statistical Machine Translation},
  pages     = {1103--1113},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/coling/SuLMZLL10,
  author    = {Jinsong Su and
               Yang Liu and
               Haitao Mi and
               Hongmei Zhao and
               Yajuan Lv and
               Qun Liu},
  title     = {Dependency-Based Bracketing Transduction Grammar for Statistical Machine
               Translation},
  pages     = {1185--1193},
  publisher = {Chinese Information Processing Society of China},
  year      = {2010}
}
@inproceedings{DBLP:conf/coling/XieXL14,
  author    = {Jun Xie and
               Jinan Xu and
               Qun Liu},
  title     = {Augment Dependency-to-String Translation with Fixed and Floating Structures},
  pages     = {2217--2226},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/emnlp/LiWL15,
  author    = {Liangyou Li and
               Andy Way and
               Qun Liu},
  title     = {Dependency Graph-to-String Translation},
  pages     = {33--43},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/acl/MiL10,
  author    = {Haitao Mi and
               Qun Liu},
  title     = {Constituency to Dependency Translation with Forests},
  pages     = {1433--1442},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/coling/TuLHLL10,
  author    = {Zhaopeng Tu and
               Yang Liu and
               Young-Sook Hwang and
               Qun Liu and
               Shouxun Lin},
  title     = {Dependency Forest for Statistical Machine Translation},
  pages     = {1092--1100},
  publisher = {International Conference on Computational Linguistics},
  year      = {2010}
}
@inproceedings{bangalore2001computing,
  title ={Computing consensus translation from multiple machine translation systems},
  author ={Srinivas Bangalore, German Bordel and Giuseppe Riccardi},
  publisher = {IEEE Workshop on Automatic Speech Recognition and Understanding},
  pages ={351--354},
  year ={2001}
}
@inproceedings{rosti2007combining,
  author    = {Antti-Veikko I. Rosti and
               Necip Fazil Ayan and
               Bing Xiang and
               Spyridon Matsoukas and
               Richard M. Schwartz and
               Bonnie J. Dorr},
  title     = {Combining Outputs from Multiple Machine Translation Systems},
  pages     = {228--235},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@article{xiao2013bagging,
  title ={Bagging and boosting statistical machine translation systems},
  author ={Tong Xiao and Jingbo Zhu and Tongran Liu },
  publisher ={Artificial Intelligence},
  volume ={195},
  pages ={496--527},
  year ={2013}
}
@inproceedings{Yang2009Lattice,
  author    = {Yang Feng and
               Yang Liu and
               Haitao Mi and
               Qun Liu and
               Yajuan L{\"{u}}},
  title     = {Lattice-based System Combination for Statistical Machine Translation},
  pages     = {1105--1113},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{He2008Indirect,
  author    = {Xiaodong He and
               Mei Yang and
               Jianfeng Gao and
               Patrick Nguyen and
               Robert C. Moore},
  title     = {Indirect-HMM-based Hypothesis Alignment for Combining Outputs from
               Machine Translation Systems},
  pages     = {98--107},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@inproceedings{Li2009Incremental,
  author    = {Chi-Ho Li and
               Xiaodong He and
               Yupeng Liu and
               Ning Xi},
  title     = {Incremental {HMM} Alignment for {MT} System Combination},
  pages     = {949--957},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{Yang2009Joint,
  author    = {Yang Liu and
               Haitao Mi and
               Yang Feng and
               Qun Liu},
  title     = {Joint Decoding with Multiple Translation Models},
  pages     = {576--584},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{Mu2009Collaborative,
  author    = {Mu Li and
               Nan Duan and
               Dongdong Zhang and
               Chi-Ho Li and
               Ming Zhou},
  title     = {Collaborative Decoding: Partial Hypothesis Re-ranking Using Translation
               Consensus between Decoders},
  pages     = {585--592},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{Tong2016Syntactic,
  author    = {Tong Xiao and
               Jingbo Zhu and
               Chunliang Zhang and
               Tongran Liu},
  title     = {Syntactic Skeleton-Based Translation},
  pages     = {2856--2862},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2016},
}
@inproceedings{charniak2001immediate,
  author    = {Eugene Charniak},
  title     = {Immediate-Head Parsing for Language Models},
  pages     = {116--123},
  publisher = {Morgan Kaufmann Publishers},
  year      = {2001}
}
@inproceedings{shen2008a,
  author    = {Libin Shen and
               Jinxi Xu and
               Ralph M. Weischedel},
  title     = {A New String-to-Dependency Machine Translation Algorithm with a Target
               Dependency Language Model},
  pages     = {577--585},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2008}
}
@article{xiao2011language,
  title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation},
  author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua},
  volume ={10},
  number ={4},
  pages ={1--29},
  year ={2011},
  publisher ={ACM Transactions on Asian Language Information Processing (TALIP)}
}
%%%%% chapter 8------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 9------------------------------------------------------
@article{brown1992class,
  title={Class-based n-gram models of natural language},
  author={Peter F. Brown and
               Vincent J. Della Pietra and
               Peter V. De Souza and
               Jennifer C. Lai and
               Robert L. Mercer},
  journal={Computational linguistics},
  volume={18},
  number={4},
  pages={467--479},
  year={1992}
}

@inproceedings{mikolov2012context,
  title={Context dependent recurrent neural network language model},
  author={Tomas Mikolov and
               Geoffrey Zweig},
  publisher={IEEE Spoken Language Technology Workshop},
  pages={234--239},
  year={2012}
}

@article{zaremba2014recurrent,
  title={Recurrent Neural Network Regularization},
  author={Wojciech Zaremba and
               Ilya Sutskever and
               Oriol Vinyals},
  journal={arXiv: Neural and Evolutionary Computing},
  year={2014}
}

@article{zilly2016recurrent,
  title={Recurrent Highway Networks},
  author={Julian G. Zilly and
               Rupesh Kumar Srivastava and
               Jan Koutn{\'{\i}}k and
               J{\"{u}}rgen Schmidhuber},
  journal={International Conference on Machine Learning},
  year={2016}
}

@article{merity2017regularizing,
  title={Regularizing and optimizing LSTM language models},
  author={Stephen Merity and
               Nitish Shirish Keskar and
               Richard Socher},
  journal={International Conference on Learning Representations},
  year={2017}
}

@article{radford2019language,
  title ={Language models are unsupervised multitask learners},
  author ={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
  journal ={OpenAI Blog},
  volume ={1},
  number ={8},
  pages ={9},
  year ={2019}
}

@article{baydin2017automatic,
  title ={Automatic differentiation in machine learning: a survey},
  author ={Baydin, At{\i}l{\i}m G{\"u}nes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
  journal ={Journal of Machine Learning Research},
  volume ={18},
  number ={1},
  pages ={5595--5637},
  year ={2017}
}

@article{qian1999momentum,
  author    = {Ning Qian},
  title     = {On the momentum term in gradient descent learning algorithms},
  journal   = {Neural Networks},
  volume    = {12},
  number    = {1},
  pages     = {145--151},
  year      = {1999},
}

@article{duchi2011adaptive,
  author    = {John C. Duchi and
               Elad Hazan and
               Yoram Singer},
  title     = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
  journal   = {Journal of Machine Learning Research},
  volume    = {12},
  pages     = {2121--2159},
  year      = {2011},
}

@article{tieleman2012rmsprop,
  title ={Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
  author ={Tieleman, Tijmen and Hinton, Geoffrey},
  journal ={COURSERA: Neural networks for machine learning},
  volume ={4},
  number ={2},
  pages ={26--31},
  year ={2012}
}

@inproceedings{kingma2014adam,
  author    = {Diederik P. Kingma and
               Jimmy Ba},
  title     = {Adam: {A} Method for Stochastic Optimization},
  publisher = {International Conference on Learning Representations},
  year      = {2015}
}

@inproceedings{ioffe2015batch,
  author    = {Sergey Ioffe and
               Christian Szegedy},
  title     = {Batch Normalization: Accelerating Deep Network Training by Reducing
               Internal Covariate Shift},
  publisher = {International Conference on Machine Learning},
  volume    = {37},
  pages     = {448--456},
  year      = {2015}
}

@article{Ba2016LayerN,
  author    = {Lei Jimmy Ba and
               Jamie Ryan Kiros and
               Geoffrey Hinton},
  title     = {Layer Normalization},
  journal   = {CoRR},
  volume    = {abs/1607.06450},
  year      = {2016}
}

@inproceedings{mikolov2013distributed,
  author    = {Tomas Mikolov and
               Ilya Sutskever and
               Kai Chen and
               Gregory S. Corrado and
               Jeffrey Dean},
  title     = {Distributed Representations of Words and Phrases and their Compositionality},
  publisher = {Conference on Neural Information Processing Systems},
  pages     = {3111--3119},
  year      = {2013}
}

@inproceedings{arthur2016incorporating,
  author    = {Philip Arthur and
               Graham Neubig and
               Satoshi Nakamura},
  title     = {Incorporating Discrete Translation Lexicons into Neural Machine Translation},
  pages     = {1557--1567},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}

@inproceedings{stahlberg2016syntactically,
  author    = {Felix Stahlberg and
               Eva Hasler and
               Aurelien Waite and
               Bill Byrne},
  title     = {Syntactically Guided Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016},
}

@inproceedings{plank2013embedding,
  author    = {Barbara Plank and
               Alessandro Moschitti},
  title     = {Embedding Semantic Similarity in Tree Kernels for Domain Adaptation
               of Relation Extraction},
  pages     = {1498--1507},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}

@inproceedings{perozzi2014deepwalk,
  author    = {Bryan Perozzi and
               Rami Al-Rfou and
               Steven Skiena},
  title     = {DeepWalk: online learning of social representations},
  publisher = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
  pages     = {701--710},
  year      = {2014}
}

@article{2011Natural,
  title={Natural Language Processing (almost) from Scratch},
  author={ Collobert, Ronan  and  Weston, Jason  and Bottou, Léon and  Karlen, Michael  and  Kavukcuoglu, Koray  and  Kuksa, Pavel },
  journal={Journal of Machine Learning Research},
  volume={12},
  number={1},
  pages={2493-2537},
  year={2011}
}
@inproceedings{mccann2017learned,
  author    = {Bryan Mccann and
               James Bradbury and
               Caiming Xiong and
               Richard Socher},
  title     = {Learned in Translation: Contextualized Word Vectors},
  booktitle = {Conference on Neural Information Processing Systems},
  pages     = {6294--6305},
  year      = {2017}
}

%%%%%%%%%%%%%%%%%%%%%%%神经语言模型，已检查修改%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{Peters2018DeepCW,
  title={Deep contextualized word representations},
  author={Matthew E. Peters and 
          Mark Neumann and 
		  Mohit Iyyer and 
		  Matt Gardner and 
		  Christopher Clark and 
		  Kenton Lee and 
		  Luke Zettlemoyer},
  publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
  year={2018}
}


@article{Graves2013HybridSR,
  title={Hybrid speech recognition with Deep Bidirectional LSTM},
  author={Alex Graves and 
          Navdeep Jaitly and 
		  Abdel-rahman Mohamed},
  publisher={IEEE Workshop on Automatic Speech Recognition and Understanding},
  year={2013},
  pages={273-278}
}

@inproceedings{Verwimp2017CharacterWordLL,
  title={Character-Word LSTM Language Models},
  author={Lyan Verwimp and 
          Joris Pelemans and 
		  Hugo Van Hamme and 
		  Patrick Wambacq},
  publisher={European Association of Computational Linguistics},
  year={2017}
}

@inproceedings{Onoe2016GatedWR,
  author    = {Yasumasa Miyamoto and
               Kyunghyun Cho},
  title     = {Gated Word-Character Recurrent Language Model},
  pages     = {1992--1997},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}
@inproceedings{Hwang2017CharacterlevelLM,
  title={Character-level language modeling with hierarchical recurrent neural networks},
  author={Kyuyeon Hwang and 
          Wonyong Sung},
  publisher={International Conference on Acoustics, Speech and Signal Processing},
  year={2017},
  pages={5720-5724}
}

@inproceedings{Kim2016CharacterAwareNL,
  title={Character-Aware Neural Language Models},
  author={Yoon Kim and 
          Yacine Jernite and 
		  David Sontag and 
		  Alexander M. Rush},
  publisher={AAAI Conference on Artificial Intelligence},
  year={2016}
}
@article{Ahn2016ANK,
  title={A Neural Knowledge Language Model},
  author={Sungjin Ahn and 
          Heeyoul Choi and 
		  Tanel P{\"a}rnamaa and 
		  Yoshua Bengio},
  journal={arXiv preprint arXiv:1608.00318},
  year={2016}
}
@article{Wang2015LargerContextLM,
  title={Larger-Context Language Modelling},
  author={Tian Wang and 
          Kyunghyun Cho},
  journal={Annual Meeting of the Association for Computational Linguistics},
  year={2015}
}
@article{Adel2015SyntacticAS,
  title={Syntactic and Semantic Features For Code-Switching Factored Language Models},
  author={Heike Adel and 
          Ngoc Vu and 
		  Katrin Kirchhoff and 
		  Dominic Telaar and 
		  Tanja Schultz},
  journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  year={2015},
  volume={23},
  pages={431-440}
}
@inproceedings{Wu2012FactoredLM,
  title={Factored Language Model based on Recurrent Neural Network},
  author={Youzheng Wu and 
          Xugang Lu and 
		  Hitoshi Yamamoto and 
		  Shigeki Matsuda and 
		  Chiori Hori and 
		  Hideki Kashioka},
  publisher={International Conference on Computational Linguistics},
  year={2012}
}
@inproceedings{Pham2016ConvolutionalNN,
  title={Convolutional Neural Network Language Models},
  author={Ngoc-quan Pham and 
          German Kruszewski and 
		  Gemma Boleda},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  year={2016}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%深度阅读修改和补充，待检查修改%%%%%%%%%%%%%%%%%%%
@article{moraffah2020causal,
  title={Causal Interpretability for Machine Learning-Problems, Methods and Evaluation},
  author={Raha Moraffah and 
          Mansooreh Karami and 
		  Ruocheng Guo and 
		  Adrienne Raglin and 
		  Huan Liu},
  journal={ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
  volume={22},
  number={1},
  pages={18--33},
  year={2020}
}

@incollection{nguyen2019understanding,
  title={Understanding neural networks via feature visualization: A survey},
  author={Anh Nguyen and 
          Jason Yosinski and 
		  Jeff Clune},
  pages={55--76},
  year={2019},
  publisher={Springer}
}
@inproceedings{yang2017improving,
  title={Improving adversarial neural machine translation with prior knowledge},
  author={Yating Yang and 
          Xiao Li and 
		  Tonghai Jiang and 
		  Jinying Kong and 
		  Bo Ma and 
		  Xi Zhou and 
		  Lei Wang },
  publisher={IEEE Global Conference on Signal and Information Processing},
  pages={1373--1377},
  year={2017}
}
@inproceedings{currey2019incorporating,
  title={Incorporating source syntax into transformer-based neural machine translation},
  author={Anna Currey and 
          Kenneth Heafield},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={24--33},
  year={2019}
}

@article{currey2018multi,
  title={Multi-source syntactic neural machine translation},
  author={Anna Currey and 
          Kenneth Heafield},
  journal={Conference on Empirical Methods in Natural Language Processing},
  year={2018}
}
@inproceedings{marevcek2018extracting,
  title={Extracting syntactic trees from transformer encoder self-attentions},
  author={David Mare{\v{c}}ek 
          and Rudolf Rosa},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={347--349},
  year={2018}
}
@article{blevins2018deep,
  title={Deep rnns encode soft hierarchical syntax},
  author={Blevins, Terra and Levy, Omer and Zettlemoyer, Luke},
  journal={Annual Meeting of the Association for Computational Linguistics},
  year={2018}
}
@inproceedings{Yin2018StructVAETL,
  title={StructVAE: Tree-structured Latent Variable Models for Semi-supervised Semantic Parsing},
  author={Pengcheng Yin and 
          Chunting Zhou and 
		  Junxian He and 
		  Graham Neubig},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2018}
}
@article{Aharoni2017TowardsSN,
  title={Towards String-To-Tree Neural Machine Translation},
  author={Roee Aharoni and 
          Yoav Goldberg},
  journal={Annual Meeting of the Association for Computational Linguistics},
  year={2017}
}

@inproceedings{Bastings2017GraphCE,
  title={Graph Convolutional Encoders for Syntax-aware Neural Machine Translation},
  author={Jasmijn Bastings and 
          Ivan Titov and Wilker Aziz and 
		  Diego Marcheggiani and 
		  Khalil Sima'an},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  year={2017}
}

@article{KoncelKedziorski2019TextGF,
  title={Text Generation from Knowledge Graphs with Graph Transformers},
  author={Rik Koncel-Kedziorski and 
          Dhanush Bekal and Yi Luan and 
		  Mirella Lapata and 
		  Hannaneh Hajishirzi},
  journal={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year={2019}
}

@article{Kovalerchuk2020SurveyOE,
  title={Survey of explainable machine learning with visual and granular methods beyond quasi-explanations},
  author={Boris Kovalerchuk and 
          Muhammad Ahmad and 
		  Ankur Teredesai},
  journal={ArXiv},
  year={2020},
  volume={abs/2009.10221}
}

@article{DoshiVelez2017TowardsAR,
  title={Towards A Rigorous Science of Interpretable Machine Learning},
  author={Finale Doshi-Velez and 
          Been Kim},
  journal={arXiv preprint arXiv:1702.08608},
  year={2017}
}

@inproceedings{Dozat2016IncorporatingNM,
  title={Incorporating Nesterov Momentum into Adam},
  author={Timothy Dozat},
  publisher={International Conference on Learning Representations},
  year={2016}
}

@inproceedings{Reddi2018OnTC,
  author    = {Sashank J. Reddi and
               Satyen Kale and
               Sanjiv Kumar},
  title     = {On the Convergence of Adam and Beyond},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@article{Zeiler2012ADADELTAAA,
  author    = {Matthew D. Zeiler},
  title     = {ADADELTA:An Adaptive Learning Rate Method},
  journal   = {arXiv preprint arXiv:1212.5701},
  year      = {2012}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 9------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 10------------------------------------------------------
@inproceedings{vaswani2017attention,
	title={Attention is All You Need},
	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
	publisher={International Conference on Neural Information Processing},
	pages={5998--6008},
	year={2017}
}

@inproceedings{DBLP:conf/acl/LiLWJXZLL20,
  author    = {Bei Li and
               Hui Liu and
               Ziyang Wang and
               Yufan Jiang and
               Tong Xiao and
               Jingbo Zhu and
               Tongran Liu and
               Changliang Li},
  title     = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine
               Translation},
  pages     = {3512--3518},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@inproceedings{DBLP:conf/emnlp/MiWI16,
  author    = {Haitao Mi and
               Zhiguo Wang and
               Abe Ittycheriah},
  title     = {Supervised Attentions for Neural Machine Translation},
  pages     = {2283--2288},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{DBLP:conf/coling/LiuUFS16,
  author    = {Lemao Liu and
               Masao Utiyama and
               Andrew M. Finch and
               Eiichiro Sumita},
  title     = {Neural Machine Translation with Supervised Attention},
  pages     = {3093--3102},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{devlin-etal-2014-fast,
  author    = {Jacob Devlin and
               Rabih Zbib and
               Zhongqiang Huang and
               Thomas Lamar and
               Richard M. Schwartz and
               John Makhoul},
  title     = {Fast and Robust Neural Network Joint Models for Statistical Machine
               Translation},
  pages     = {1370--1380},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{Schwenk_continuousspace,
  author    = {Holger Schwenk},
  title     = {Continuous Space Translation Models for Phrase-Based Statistical Machine
               Translation},
  pages     = {1071--1080},
  publisher = {International Conference on Computational Linguistics},
  year      = {2012}
}
@inproceedings{kalchbrenner-blunsom-2013-recurrent,
  author    = {Nal Kalchbrenner and
               Phil Blunsom},
  title     = {Recurrent Continuous Translation Models},
  pages     = {1700--1709},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@article{HochreiterThe,
  author    = {Sepp Hochreiter},
  title     = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets
               and Problem Solutions},
  journal   = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
  volume    = {6},
  number    = {2},
  pages     = {107--116},
  year      = {1998}
}
@article{BENGIO1994Learning,
author    = {Yoshua Bengio and
               Patrice Y. Simard and
               Paolo Frasconi},
  title     = {Learning long-term dependencies with gradient descent is difficult},
  journal   = {IEEE Transportation Neural Networks},
  volume    = {5},
  number    = {2},
  pages     = {157--166},
  year      = {1994}
}

@article{StahlbergNeural,
  title={Neural Machine Translation: A Review},
  author={Felix Stahlberg},
  journal={Journal of Artificial Intelligence Research},
  year={2020},
  volume={69},
  pages={343-418}
}
@inproceedings{Bentivogli2016NeuralVP,
  author    = {Luisa Bentivogli and
               Arianna Bisazza and
               Mauro Cettolo and
               Marcello Federico},
  title     = {Neural versus Phrase-Based Machine Translation Quality: a Case Study},
  pages     = {257--267},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@article{Hassan2018AchievingHP,
  author    = {Hany Hassan and
               Anthony Aue and
               Chang Chen and
               Vishal Chowdhary and
               Jonathan Clark and
               Christian Federmann and
               Xuedong Huang and
               Marcin Junczys-Dowmunt and
               William Lewis and
               Mu Li and
               Shujie Liu and
               Tie-Yan Liu and
               Renqian Luo and
               Arul Menezes and
               Tao Qin and
               Frank Seide and
               Xu Tan and
               Fei Tian and
               Lijun Wu and
               Shuangzhi Wu and
               Yingce Xia and
               Dongdong Zhang and
               Zhirui Zhang and
               Ming Zhou},
  title     = {Achieving Human Parity on Automatic Chinese to English News Translation},
  journal   = {CoRR},
  volume    = {abs/1803.05567},
  year      = {2018},
}
@inproceedings{WangLearning,
  author    = {Qiang Wang and
               Bei Li and
               Tong Xiao and
               Jingbo Zhu and
               Changliang Li and
               Derek F. Wong and
               Lidia S. Chao},
  title     = {Learning Deep Transformer Models for Machine Translation},
  pages     = {1810--1822},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{Li2020NeuralMT,
  author    = {Yanyang Li and
               Qiang Wang and
               Tong Xiao and
               Tongran Liu and
               Jingbo Zhu},
  title     = {Neural Machine Translation with Joint Representation},
  pages     = {8285--8292},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
}
@article{HochreiterLong,
  author = {Hochreiter, Sepp and Schmidhuber, Jürgen},
  year = {1997},
  month = {12},
  pages = {1735-80},
  title = {Long Short-term Memory},
  volume = {9},
  journal = {Neural Computation}
}
@inproceedings{Cho2014Learning,
  author    = {Kyunghyun Cho and
               Bart van Merrienboer and
               {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               Dzmitry Bahdanau and
               Fethi Bougares and
               Holger Schwenk and
               Yoshua Bengio},
  title     = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical
               Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {1724--1734},
  year      = {2014}
}
@inproceedings{pmlr-v9-glorot10a,
  author    = {Xavier Glorot and
               Yoshua Bengio},
  title     = {Understanding the difficulty of training deep feedforward neural networks},
  publisher = {International Conference on Artificial Intelligence and Statistics},
  volume    = {9},
  pages     = {249--256},
  year      = {2010}
}
@inproceedings{xiao2017fast,
  author    = {Tong Xiao and
               Jingbo Zhu and
               Tongran Liu and
               Chunliang Zhang},
  title     = {Fast Parallel Training of Neural Language Models},
  publisher = {International Joint Conference on Artificial Intelligence},
  pages     = {4193--4199},
  year      = {2017}
}
@inproceedings{Gu2017NonAutoregressiveNM,
  author    = {Jiatao Gu and
               James Bradbury and
               Caiming Xiong and
               Victor O. K. Li and
               Richard Socher},
  title     = {Non-Autoregressive Neural Machine Translation},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}
@inproceedings{li-etal-2018-simple,
  author    = {Yanyang Li and
               Tong Xiao and
               Yinqiao Li and
               Qiang Wang and
               Changming Xu and
               Jingbo Zhu},
  title     = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {292--297},
  year      = {2018}
}
@inproceedings{TuModeling,
  author    = {Zhaopeng Tu and
               Zhengdong Lu and
               Yang Liu and
               Xiaohua Liu and
               Hang Li},
  title     = {Modeling Coverage for Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:journals/corr/SennrichFCBHHJL17,
  author    = {Rico Sennrich and
               Orhan Firat and
               Kyunghyun Cho and
               Barry Haddow and
			   Alexandra Birch and
               Julian Hitschler and
               Marcin Junczys-Dowmunt and
               Samuel L{\"{a}}ubli and
               Antonio Valerio Miceli Barone and
               Jozef Mokry and
               Maria Nadejde},
  title     = {Nematus: a Toolkit for Neural Machine Translation},
  publisher = {European Association of Computational Linguistics},
  pages     = {65--68},
  year      = {2017}
}
@inproceedings{DBLP:journals/corr/abs-1905-13324,
  author    = {Biao Zhang and
               Rico Sennrich},
  title     = {A Lightweight Recurrent Network for Sequence Modeling},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {1538--1548},
  year      = {2019}
}
@article{Lei2017TrainingRA,
  author    = {Tao Lei and
               Yu Zhang and
               Yoav Artzi},
  title     = {Training RNNs as Fast as CNNs},
  journal   = {CoRR},
  volume    = {abs/1709.02755},
  year      = {2017}
}
@inproceedings{Zhang2018SimplifyingNM,
  author    = {Biao Zhang and
               Deyi Xiong and
               Jinsong Su and
               Qian Lin and
               Huiji Zhang},
  title     = {Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated
               Recurrent Networks},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {4273--4283},
  year      = {2018}
}
@inproceedings{Liu_2019_CVPR,
  author    = {Shikun Liu and
               Edward Johns and
               Andrew J. Davison},
  title     = {End-To-End Multi-Task Learning With Attention},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {1871--1880},
  year      = {2019}
}
@inproceedings{DBLP:journals/corr/abs-1811-00498,
  author    = {Ra{\'{u}}l V{\'{a}}zquez and
               Alessandro Raganato and
               J{\"{o}}rg Tiedemann and
               Mathias Creutz},
  title     = {Multilingual {NMT} with a Language-Independent Attention Bridge},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {33--39},
  year      = {2019}
}
@inproceedings{MoradiInterrogating,
  author    = {Pooya Moradi and
               Nishant Kambhatla and
               Anoop Sarkar},
  title     = {Interrogating the Explanatory Power of Attention in Neural Machine
               Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {221--230},
  year      = {2019}
}
@inproceedings{WangNeural,
  author    = {Xing Wang and
               Zhengdong Lu and
               Zhaopeng Tu and
               Hang Li and
               Deyi Xiong and
               Min Zhang},
  title     = {Neural Machine Translation Advised by Statistical Machine Translation},
  publisher = {AAAI Conference on Artificial Intelligence},
  pages     = {3330--3336},
  year      = {2017}
}
@inproceedings{Xiao2019SharingAW,
  author    = {Tong Xiao and
               Yinqiao Li and
               Jingbo Zhu and
               Zhengtao Yu and
               Tongran Liu},
  title     = {Sharing Attention Weights for Fast Transformer},
  publisher = {International Joint Conference on Artificial Intelligence},
  pages     = {5292--5298},
  year      = {2019}
}
@inproceedings{Yang2017TowardsBH,
  author    = {Baosong Yang and
               Derek F. Wong and
               Tong Xiao and
               Lidia S. Chao and
               Jingbo Zhu},
  title     = {Towards Bidirectional Hierarchical Representations for Attention-based
               Neural Machine Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {1432--1441},
  year      = {2017}
}
@inproceedings{Wang2019TreeTI,
  author    = {Yau-Shian Wang and
               Hung-yi Lee and
               Yun-Nung Chen},
  title     = {Tree Transformer: Integrating Tree Structures into Self-Attention},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {1061--1070},
  year      = {2019}
}
@inproceedings{DBLP:journals/corr/abs-1809-01854,
  author    = {Jetic Gu and
               Hassan S. Shavarani and
               Anoop Sarkar},
  title     = {Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {401--413},
  year      = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1808-09374,
  author    = {Xinyi Wang and
               Hieu Pham and
               Pengcheng Yin and
               Graham Neubig},
  title     = {A Tree-based Decoder for Neural Machine Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {4772--4777},
  year      = {2018}
}
@article{DBLP:journals/corr/ZhangZ16c,
  author    = {Jiajun Zhang and
               Chengqing Zong},
  title     = {Bridging Neural Machine Translation and Bilingual Dictionaries},
  journal   = {CoRR},
  volume    = {abs/1610.07272},
  year      = {2016}
}
@article{Dai2019TransformerXLAL,
  author    = {Zihang Dai and
               Zhilin Yang and
               Yiming Yang and
               Jaime G. Carbonell and
               Quoc V. Le and
               Ruslan Salakhutdinov},
  title     = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
  journal   = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {2978--2988},
  year      = {2019}
}
@inproceedings{li-etal-2019-word,
  author    = {Xintong Li and
               Guanlin Li and
               Lemao Liu and
               Max Meng and
               Shuming Shi},
  title     = {On the Word Alignment from Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {1293--1303},
  year      = {2019}
}

@inproceedings{Werlen2018DocumentLevelNM,
  author    = {Lesly Miculicich Werlen and
               Dhananjay Ram and
               Nikolaos Pappas and
               James Henderson},
  title     = {Document-Level Neural Machine Translation with Hierarchical Attention
               Networks},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {2947--2954},
  year      = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1805-10163,
  author    = {Elena Voita and
               Pavel Serdyukov and
               Rico Sennrich and
               Ivan Titov},
  title     = {Context-Aware Neural Machine Translation Learns Anaphora Resolution},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {1264--1274},
  year      = {2018}
}
@article{DBLP:journals/corr/abs-1906-00532,
  author    = {Aishwarya Bhandare and
               Vamsi Sripathi and
               Deepthi Karkada and
               Vivek Menon and
               Sun Choi and
               Kushal Datta and
               Vikram Saletore},
  title     = {Efficient 8-Bit Quantization of Transformer Neural Machine Language
               Translation Model},
  journal   = {CoRR},
  volume    = {abs/1906.00532},
  year      = {2019}
}

@inproceedings{Zhang2018SpeedingUN,
  author    = {Wen Zhang and
               Liang Huang and
               Yang Feng and
               Lei Shen and
               Qun Liu},
  title     = {Speeding Up Neural Machine Translation Decoding by Cube Pruning},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {4284--4294},
  year      = {2018}
}
@inproceedings{DBLP:journals/corr/SeeLM16,
  author    = {Abigail See and
               Minh-Thang Luong and
               Christopher D. Manning},
  title     = {Compression of Neural Machine Translation Models via Pruning},
  publisher = {International Conference on Computational Linguistics},
  pages     = {291--301},
  year      = {2016}
}
@inproceedings{DBLP:journals/corr/ChenLCL17,
  author    = {Yun Chen and
               Yang Liu and
               Yong Cheng and
               Victor O. K. Li},
  title     = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
  pages     = {1925--1935},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@article{Hinton2015Distilling,
  author    = {Geoffrey Hinton and
               Oriol Vinyals and
               Jeffrey Dean},
  title     = {Distilling the Knowledge in a Neural Network},
  journal   = {CoRR},
  volume    = {abs/1503.02531},
  year      = {2015}
}

@inproceedings{Ott2018ScalingNM,
  title={Scaling Neural Machine Translation},
  author={Myle Ott and Sergey Edunov and David Grangier and Michael Auli},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2018}
}
@inproceedings{Lin2020TowardsF8,
  author    = {Ye Lin and
               Yanyang Li and
               Tengbo Liu and
               Tong Xiao and
               Tongran Liu and
               Jingbo Zhu},
  title     = {Towards Fully 8-bit Integer Inference for the Transformer Model},
  pages     = {3759--3765},
  publisher = {International Joint Conference on Artificial Intelligence},
  year      = {2020}
}
@inproceedings{kim-rush-2016-sequence,
    author    = {Yoon Kim and
               Alexander M. Rush},
  title     = {Sequence-Level Knowledge Distillation},
  pages     = {1317--1327},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}
@article{Akaike1969autoregressive,
  author    = {Hirotugu Akaike},
  title     = {Fitting autoregressive models for prediction},
  journal   = {Annals of the institute of Statistical Mathematics},
  volume    = {21(1)},
  year      = {2015},
  pages     = {243--247},
}
@inproceedings{Chen2018TheBO,
  author    = {Mia Xu Chen and
               Orhan Firat and
               Ankur Bapna and
               Melvin Johnson and
               Wolfgang Macherey and
               George F. Foster and
               Llion Jones and
               Mike Schuster and
               Noam Shazeer and
               Niki Parmar and
               Ashish Vaswani and
               Jakob Uszkoreit and
               Lukasz Kaiser and
               Zhifeng Chen and
               Yonghui Wu and
               Macduff Hughes},
  title     = {The Best of Both Worlds: Combining Recent Advances in Neural Machine
               Translation},
  pages     = {76--86},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{He2018LayerWiseCB,
  title={Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation},
  author={Tianyu He and Xu Tan and Yingce Xia and Di He and Tao Qin and Zhibo Chen and Tie-Yan Liu},
  publisher={Conference on Neural Information Processing Systems},
  year={2018}
}
@inproceedings{cho-etal-2014-properties,
    author    = {Kyunghyun Cho and
               Bart van Merrienboer and
               Dzmitry Bahdanau and
               Yoshua Bengio},
  title     = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches},
  pages     = {103--111},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}

@inproceedings{DBLP:conf/acl/JeanCMB15,
  author    = {S{\'{e}}bastien Jean and
               KyungHyun Cho and
               Roland Memisevic and
               Yoshua Bengio},
  title     = {On Using Very Large Target Vocabulary for Neural Machine Translation},
  pages     = {1--10},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}

@inproceedings{DBLP:journals/corr/LuongPM15,
  author    = {Thang Luong and
               Hieu Pham and
               Christopher D. Manning},
  title     = {Effective Approaches to Attention-based Neural Machine Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {1412--1421},
  year      = {2015}
}
@inproceedings{He2016ImprovedNM,
  author    = {Wei He and
               Zhongjun He and
               Hua Wu and
               Haifeng Wang},
  title     = {Improved Neural Machine Translation with {SMT} Features},
  pages     = {151--157},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2016}
}
@inproceedings{zhang-etal-2017-prior,
    title = {Prior Knowledge Integration for Neural Machine Translation using Posterior Regularization},
    author = {Zhang, Jiacheng  and
      Liu, Yang  and
      Luan, Huanbo  and
      Xu, Jingfang  and
      Sun, Maosong},
    year = {2017},
    publisher = {Annual Meeting of the Association for Computational Linguistics},
    pages = {1514--1523},
}

@inproceedings{duan-etal-2020-bilingual,
    author    = {Xiangyu Duan and
               Baijun Ji and
               Hao Jia and
               Min Tan and
               Min Zhang and
               Boxing Chen and
               Weihua Luo and
               Yue Zhang},
  title     = {Bilingual Dictionary Based Neural Machine Translation without Using
               Parallel Sentences},
  pages     = {1570--1579},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@inproceedings{cao-xiong-2018-encoding,
    author    = {Qian Cao and
               Deyi Xiong},
  title     = {Encoding Gated Translation Memory into Neural Machine Translation},
  pages     = {3042--3047},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
}
@inproceedings{yang-etal-2016-hierarchical,
    author    = {Zichao Yang and
               Diyi Yang and
               Chris Dyer and
               Xiaodong He and
               Alexander J. Smola and
               Eduard H. Hovy},
  title     = {Hierarchical Attention Networks for Document Classification},
  pages     = {1480--1489},
  publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2016}
}
%%%%% chapter 10------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 11------------------------------------------------------
@article{DBLP:journals/pami/RenHG017,
  author    = {Shaoqing Ren and
               Kaiming He and
               Ross Girshick and
               Jian Sun},
  title     = {Faster {R-CNN:} Towards Real-Time Object Detection with Region Proposal
               Networks},
  journal   = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
  volume    = {39},
  number    = {6},
  pages     = {1137--1149},
  year      = {2017}
}

@inproceedings{DBLP:conf/eccv/LiuAESRFB16,
  author    = {Wei Liu and
               Dragomir Anguelov and
               Dumitru Erhan and
               Christian Szegedy and
               Scott Reed and
               Cheng-Yang Fu and
               Alexander C. Berg},
  title     = {{SSD:} Single Shot MultiBox Detector},
  publisher = {European Conference on Computer Vision},
  volume    = {9905},
  pages     = {21--37},
  year      = {2016}
}

@inproceedings{devlin-etal-2014-fast,
  author    = {Jacob Devlin and
               Rabih Zbib and
               Zhongqiang Huang and
               Thomas Lamar and
               Richard M. Schwartz and
               John Makhoul},
  title     = {Fast and Robust Neural Network Joint Models for Statistical Machine
               Translation},
  pages     = {1370--1380},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}

@inproceedings{DBLP:conf/acl/WangLLJL15,
  author    = {Mingxuan Wang and
               Zhengdong Lu and
               Hang Li and
               Wenbin Jiang and
               Qun Liu},
  title     = {genCNN: {A} Convolutional Architecture for Word Sequence Prediction},
  pages     = {1567--1576},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}

@inproceedings{DBLP:conf/icassp/ZhangCJ17,
  author    = {Yu Zhang and
               William Chan and
               Navdeep Jaitly},
  title     = {Very deep convolutional networks for end-to-end speech recognition},
  pages     = {4845--4849},
  publisher = {International Conference on Acoustics, Speech and Signal Processing},
  year      = {2017}
}

@inproceedings{DBLP:conf/icassp/DengAY13,
  author    = {Li Deng and
               Ossama Abdel-Hamid and
               Dong Yu},
  title     = {A deep convolutional neural network using heterogeneous pooling for
               trading acoustic invariance with phonetic confusion},
  pages     = {6669--6673},
  publisher = {International Conference on Acoustics, Speech and Signal Processing},
  year      = {2013}
}

@inproceedings{DBLP:journals/corr/LuongPM15,
  author    = {Thang Luong and
               Hieu Pham and
               Christopher D. Manning},
  title     = {Effective Approaches to Attention-based Neural Machine Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {1412--1421},
  year      = {2015}
}

@inproceedings{DBLP:conf/acl-codeswitch/WangCK18,
  author    = {Changhan Wang and
               Kyunghyun Cho and
               Douwe Kiela},
  title     = {Code-Switched Named Entity Recognition with Embedding Attention},
  pages     = {154--158},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@inproceedings{DBLP:conf/emnlp/LiDWCM17,
  author    = {Peng-Hsuan Li and
               Ruo-Ping Dong and
               Yu-Siang Wang and
               Ju-Chieh Chou and
               Wei-Yun Ma},
  title     = {Leveraging Linguistic Structures for Named Entity Recognition with
               Bidirectional Recursive Neural Networks},
  pages     = {2664--2669},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}

@inproceedings{DBLP:conf/acl/MaH16,
  author    = {Xuezhe Ma and
               Eduard H. Hovy},
  title     = {End-to-end Sequence Labeling via Bi-directional LSTM-CNNs-CRF},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{DBLP:conf/emnlp/StrubellVBM17,
  author    = {Emma Strubell and
               Patrick Verga and
               David Belanger and
               Andrew Mccallum},
  title     = {Fast and Accurate Entity Recognition with Iterated Dilated Convolutions},
  pages     = {2670--2680},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}

@inproceedings{DBLP:conf/cncl/ZhouZXQBX17,
  author    = {Peng Zhou and
               Suncong Zheng and
               Jiaming Xu and
               Zhenyu Qi and
               Hongyun Bao and
               Bo Xu},
  title     = {Joint Extraction of Multiple Relations and Entities by Using a Hybrid
               Neural Network},
  volume    = {10565},
  pages     = {135--146},
  publisher = {Springer},
  year      = {2017}
}

@article{2011Natural,
  title={Natural Language Processing (almost) from Scratch},
  author={ Collobert, Ronan  and  Weston, Jason  and Bottou, Léon and  Karlen, Michael  and  Kavukcuoglu, Koray  and  Kuksa, Pavel },
  journal={Journal of Machine Learning Research},
  volume={12},
  number={1},
  pages={2493-2537},
  year={2011},
}
@inproceedings{DBLP:conf/acl/NguyenG15,
  author    = {Thien Huu Nguyen and
               Ralph Grishman},
  title     = {Event Detection and Domain Adaptation with Convolutional Neural Networks},
  pages     = {365--371},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}

@inproceedings{DBLP:conf/aaai/LaiXLZ15,
  author    = {Siwei Lai and
               Liheng Xu and
               Kang Liu and
               Jun Zhao},
  title     = {Recurrent Convolutional Neural Networks for Text Classification},
  pages     = {2267--2273},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2015}
}

@inproceedings{DBLP:conf/acl/ChenXLZ015,
  author    = {Yubo Chen and
               Liheng Xu and
               Kang Liu and
               Daojian Zeng and
               Jun Zhao},
  title     = {Event Extraction via Dynamic Multi-Pooling Convolutional Neural Networks},
  pages     = {167--176},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}

@inproceedings{DBLP:conf/emnlp/LeiBJ15,
  author    = {Tao Lei and
               Regina Barzilay and
               Tommi S. Jaakkola},
  title     = {Molding CNNs for text: non-linear, non-consecutive convolutions},
  pages     = {1565--1575},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2015}
}

@inproceedings{DBLP:conf/naacl/Johnson015,
  author    = {Rie Johnson and
               Tong Zhang},
  title     = {Effective Use of Word Order for Text Categorization with Convolutional
               Neural Networks},
  pages     = {103--112},
  publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2015}
}

@inproceedings{DBLP:conf/naacl/NguyenG15,
  author    = {Thien Huu Nguyen and
               Ralph Grishman},
  title     = {Relation Extraction: Perspective from Convolutional Neural Networks},
  pages     = {39--48},
  publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2015}
}

@article{StahlbergNeural,
  title={Neural Machine Translation: A Review},
  author={Felix Stahlberg},
  journal={Journal of Artificial Intelligence Research},
  year={2020},
  volume={69},
  pages={343-418}
}

@article{Sennrich2016ImprovingNM,
  author    = {Rico Sennrich and
               Barry Haddow and
               Alexandra Birch},
  title     = {Improving Neural Machine Translation Models with Monolingual Data},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{bahdanau2014neural,
  author    = {Dzmitry Bahdanau and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  publisher = {International Conference on Learning Representations},
  year      = {2015}
}

@article{Waibel1989PhonemeRU,
  title={Phoneme recognition using time-delay neural networks},
  author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and Kevin J. Lang},
  journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
  year={1989},
  volume={37},
  pages={328-339}
}

@article{LeCun1989BackpropagationAT,
  title={Backpropagation Applied to Handwritten Zip Code Recognition},
  author={Yann Lecun and Bernhard Boser and John Denker and Don Henderson and Richard E.Howard and Wayne E. Hubbard and Larry Jackel},
  journal={Neural Computation},
  year={1989},
  volume={1},
  pages={541-551}
}

@article{726791,
  author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}},
  journal={Proceedings of the IEEE}, 
  title={Gradient-based learning applied to document recognition}, 
  year={1998},
  volume={86},
  number={11},
  pages={2278-2324}
}

@inproceedings{DBLP:journals/corr/HeZRS15,
  author    = {Kaiming He and
               Xiangyu Zhang and
               Shaoqing Ren and
               Jian Sun},
  title     = {Deep Residual Learning for Image Recognition},
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  pages     = {770--778},
  year      = {2016}
}

@inproceedings{DBLP:conf/cvpr/HuangLMW17,
  author    = {Gao Huang and
               Zhuang Liu and
               Laurens van der Maaten and
               Kilian Q. Weinberger},
  title     = {Densely Connected Convolutional Networks},
  pages     = {2261--2269},
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  year      = {2017}
}

@article{Girshick2015FastR,
  title={Fast R-CNN},
  author={Ross Girshick},
  journal={International Conference on Computer Vision},
  year={2015},
  pages={1440-1448}
}

@article{He2020MaskR,
  title={Mask R-CNN},
  author={Kaiming He and Georgia Gkioxari and Piotr Doll{\'a}r and Ross B. Girshick},
  journal={International Conference on Computer Vision},
  pages={2961--2969},
  year={2017}
}

@inproceedings{Kalchbrenner2014ACN,
  title={A Convolutional Neural Network for Modelling Sentences},
  author={Nal Kalchbrenner and Edward Grefenstette and Phil Blunsom},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={655--665},
  year={2014}
}

@inproceedings{Kim2014ConvolutionalNN,
  title={Convolutional Neural Networks for Sentence Classification},
  author={Yoon Kim},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages = {1746--1751},
  year={2014}
}

@inproceedings{Ma2015DependencybasedCN,
  title={Dependency-based Convolutional Neural Networks for Sentence Embedding},
  author = {Mingbo Ma and
               Liang Huang and
               Bowen Zhou and
               Bing Xiang},
  pages = {174--179},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2015}
}

@inproceedings{Santos2014DeepCN,
  title={Deep Convolutional Neural Networks for Sentiment Analysis of Short Texts},
  author    = {C{\'{\i}}cero Nogueira dos Santos and
               Maira Gatti},
  pages     = {69--78},
  publisher = {International Conference on Computational Linguistics},
  year={2014}
}

@inproceedings{Dauphin2017LanguageMW,
  title={Language Modeling with Gated Convolutional Networks},
  author    = {Yann N. Dauphin and
               Angela Fan and
               Michael Auli and
               David Grangier},
  publisher={International Conference on Machine Learning},
  volume    = {70},
  pages     = {933--941},
  year={2017}
}

@inproceedings{Gehring2017ACE,
  title={A Convolutional Encoder Model for Neural Machine Translation},
  author    = {Jonas Gehring and
               Michael Auli and
               David Grangier and
               Yann N. Dauphin},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages     = {123--135},
  year={2017}
}

@inproceedings{DBLP:journals/corr/GehringAGYD17,
  author    = {Jonas Gehring and
               Michael Auli and
               David Grangier and
               Denis Yarats and
               Yann N. Dauphin},
  title     = {Convolutional Sequence to Sequence Learning},
  publisher = {International Conference on Machine Learning},
  volume    = {70},
  pages     = {1243--1252},
  year      = {2017}
}

@article{Kaiser2018DepthwiseSC,
  title={Depthwise Separable Convolutions for Neural Machine Translation},
  author    = {Lukasz Kaiser and
               Aidan N. Gomez and
               Fran{\c{c}}ois Chollet},
  journal = {International Conference on Learning Representations},
  year={2018},
}

@inproceedings{Wu2019PayLA,
 author = {Felix Wu and
		 Angela Fan and
		 Alexei Baevski and
		 Yann N. Dauphin and
		 Michael Auli},
 title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
 publisher = {International Conference on Learning Representations},
 year = {2019}
}

@inproceedings{kalchbrenner-blunsom-2013-recurrent,
  author    = {Nal Kalchbrenner and
               Phil Blunsom},
  title     = {Recurrent Continuous Translation Models},
  pages     = {1700--1709},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2013}
}

@article{Wu2016GooglesNM,
  title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
  author    = {Yonghui Wu and
               Mike Schuster and
               Zhifeng Chen and
               Quoc V. Le and
               Mohammad Norouzi and
               Wolfgang Macherey and
               Maxim Krikun and
               Yuan Cao and
               Qin Gao and
               Klaus Macherey and
               Jeff Klingner and
               Apurva Shah and
               Melvin Johnson and
               Xiaobing Liu and
               Lukasz Kaiser and
               Stephan Gouws and
               Yoshikiyo Kato and
               Taku Kudo and
               Hideto Kazawa and
               Keith Stevens and
               George Kurian and
               Nishant Patil and
               Wei Wang and
               Cliff Young and
               Jason Smith and
               Jason Riesa and
               Alex Rudnick and
               Oriol Vinyals and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  journal   = {CoRR},
  year={2016},
  volume={abs/1609.08144}
}

@inproceedings{DBLP:journals/corr/HeZRS15,
  author    = {Kaiming He and
               Xiangyu Zhang and
               Shaoqing Ren and
               Jian Sun},
  title     = {Deep Residual Learning for Image Recognition},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {770--778},
  year      = {2016},
}

@inproceedings{Sukhbaatar2015EndToEndMN,
  title={End-To-End Memory Networks},
  author    = {Sainbayar Sukhbaatar and
               Arthur Szlam and
               Jason Weston and
               Rob Fergus},
  publisher={Conference on Neural Information Processing Systems},
  pages     = {2440--2448},
  year={2015}
}

@inproceedings{Islam2020HowMP,
  author    = {Md. Amirul Islam and
               Sen Jia and
               Neil Bruce},
  title     = {How much Position Information Do Convolutional Neural Networks Encode?},
  publisher = {International Conference on Learning Representations},
  year      = {2020},
}
@inproceedings{Sutskever2013OnTI,
  title={On the importance of initialization and momentum in deep learning},
  author    = {Ilya Sutskever and
               James Martens and
               George E. Dahl and
               Geoffrey Hinton},
  publisher = {International Conference on Machine Learning},
  pages     = {1139--1147},
  year={2013}
}

@article{Bengio2013AdvancesIO,
  title={Advances in optimizing recurrent networks},
  author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu},
  journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
  year={2013},
  pages={8624-8628}
}

@article{JMLR:v15:srivastava14a,
  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
  title   = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
  journal = {Journal of Machine Learning Research},
  year    = {2014},
  volume  = {15},
  pages   = {1929-1958},
}

@article{Chollet2017XceptionDL,
  title={Xception: Deep Learning with Depthwise Separable Convolutions},
  author    = {Fran{\c{c}}ois Chollet},
  journal={IEEE Conference on Computer Vision and Pattern Recognition},
  year={2017},
  pages={1800-1807}
}

@article{Howard2017MobileNetsEC,
  title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications},
  author    = {Andrew Howard and
               Menglong Zhu and
               Bo Chen and
               Dmitry Kalenichenko and
               Weijun Wang and
               Tobias Weyand and
               Marco Andreetto and
               Hartwig Adam},
  journal={CoRR},
  year={2017},
}

@article{sifre2014rigid,
  title={Rigid-motion scattering for image classification},
  author={Sifre, Laurent and Mallat, St{\'e}phane},
  year={2014},
  journal={Citeseer}
}

@article{Taigman2014DeepFaceCT,
  title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification},
  author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf},
  journal={IEEE Conference on Computer Vision and Pattern Recognition},
  year={2014},
  pages={1701-1708}
}

@inproceedings{Chen2015LocallyconnectedAC,
  title={Locally-connected and convolutional neural networks for small footprint speaker recognition},
  author    = {Yu-hsin Chen and
               Ignacio Lopez-Moreno and
               Tara Sainath and
               Mirk{\'{o}} Visontai and
               Raziel Alvarez and
               Carolina Parada},
  publisher={Conference of the International Speech Communication Association},
  pages     = {1136--1140},
  year={2015}
}

@article{Chen2020DynamicCA,
  title={Dynamic Convolution: Attention Over Convolution Kernels},
  author    = {Yinpeng Chen and
               Xiyang Dai and
               Mengchen Liu and
               Dongdong Chen and
               Lu Yuan and
               Zicheng Liu},
  journal = {IEEE Conference on Computer Vision and Pattern Recognition},
  year={2020},
  pages={11027-11036}
}

%%%%% chapter 11------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 12------------------------------------------------------
@inproceedings{DBLP:conf/coling/ZengLLZZ14,
  author    = {Daojian Zeng and
               Kang Liu and
               Siwei Lai and
               Guangyou Zhou and
               Jun Zhao},
  title     = {Relation Classification via Convolutional Deep Neural Network},
  pages     = {2335--2344},
  publisher = {International Conference on Computational Linguistics},
  year      = {2014}
}

@inproceedings{DBLP:conf/acl/JohnsonZ17,
  author    = {Rie Johnson and
               Tong Zhang},
  title     = {Deep Pyramid Convolutional Neural Networks for Text Categorization},
  pages     = {562--570},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{DBLP:conf/interspeech/GulatiQCPZYHWZW20,
  author    = {Anmol Gulati and
               James Qin and
               Chung-Cheng Chiu and
               Niki Parmar and
               Yu Zhang and
               Jiahui Yu and
               Wei Han and
               Shibo Wang and
               Zhengdong Zhang and
               Yonghui Wu and
               Ruoming Pang},
  title     = {Conformer: Convolution-augmented Transformer for Speech Recognition},
  pages     = {5036--5040},
  publisher = {International Speech Communication Association},
  year      = {2020}
}

@inproceedings{DBLP:conf/icassp/DongXX18,
  author    = {Linhao Dong and
               Shuang Xu and
               Bo Xu},
  title     = {Speech-Transformer: {A} No-Recurrence Sequence-to-Sequence Model for
               Speech Recognition},
  pages     = {5884--5888},
  publisher = {International Conference on Acoustics, Speech and Signal Processing},
  year      = {2018}
}

@article{DBLP:journals/corr/abs-1802-05751,
  author    = {Niki Parmar and
               Ashish Vaswani and
               Jakob Uszkoreit and
               Lukasz Kaiser and
               Noam Shazeer and
               Alexander Ku},
  title     = {Image Transformer},
  journal   = {CoRR},
  volume    = {abs/1802.05751},
  year      = {2018}
}

@inproceedings{vaswani2017attention,
	title={Attention is All You Need},
	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
	publisher={International Conference on Neural Information Processing},
	pages={5998--6008},
	year={2017}
}
%----------
%----------
@inproceedings{DBLP:conf/iclr/RaePJHL20,
  author    = {Jack W. Rae and
               Anna Potapenko and
               Siddhant M. Jayakumar and
               Chloe Hillier and
               Timothy P. Lillicrap},
  title     = {Compressive Transformers for Long-Range Sequence Modelling},
  publisher = {International Conference on Learning Representations},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-2004-05150,
  author    = {Iz Beltagy and
               Matthew E. Peters and
               Arman Cohan},
  title     = {Longformer: The Long-Document Transformer},
  journal   = {CoRR},
  volume    = {abs/2004.05150},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-2005-00743,
  author    = {Yi Tay and
               Dara Bahri and
               Donald Metzler and
               Da-Cheng Juan and
               Zhe Zhao and
               Che Zheng},
  title     = {Synthesizer: Rethinking Self-Attention in Transformer Models},
  journal   = {CoRR},
  volume    = {abs/2005.00743},
  year      = {2020}
}

@inproceedings{DBLP:conf/iclr/WuLLLH20,
  author    = {Zhanghao Wu and
               Zhijian Liu and
               Ji Lin and
               Yujun Lin and
               Song Han},
  title     = {Lite Transformer with Long-Short Range Attention},
  publisher = {International Conference on Learning Representations},
  year      = {2020}
}

@inproceedings{DBLP:journals/corr/abs-1905-09418,
  author    = {Elena Voita and
               David Talbot and
               Fedor Moiseev and
               Rico Sennrich and
               Ivan Titov},
  title     = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
               Lifting, the Rest Can Be Pruned},
  pages     = {5797--5808},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019},
}

@inproceedings{DBLP:journals/corr/LinFSYXZB17,
  author    = {Zhouhan Lin and
               Minwei Feng and
               C{\'{\i}}cero Nogueira dos Santos and
               Mo Yu and
               Bing Xiang and
               Bowen Zhou and
               Yoshua Bengio},
  title     = {A Structured Self-Attentive Sentence Embedding},
  publisher = {International Conference on Learning Representations},
  year      = {2017},
}
@inproceedings{Shaw2018SelfAttentionWR,
  author    = {Peter Shaw and
               Jakob Uszkoreit and
               Ashish Vaswani},
  title     = {Self-Attention with Relative Position Representations},
  publisher = {Proceedings of the Human Language Technology Conference of 
               the North American Chapter of the Association for Computational Linguistics},
  pages     = {464--468},
  year      = {2018},
}
@inproceedings{DBLP:journals/corr/HeZRS15,
  author    = {Kaiming He and
               Xiangyu Zhang and
               Shaoqing Ren and
               Jian Sun},
  title     = {Deep Residual Learning for Image Recognition},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {770--778},
  year      = {2016},
}
@article{JMLR:v15:srivastava14a,
  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
  title   = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
  journal = {Journal of Machine Learning Research},
  year    = {2014},
  volume  = {15},
  pages   = {1929-1958},
}
@inproceedings{Szegedy_2016_CVPR,
  author    = {Christian Szegedy and
               Vincent Vanhoucke and
               Sergey Ioffe and
               Jonathon Shlens and
               Zbigniew Wojna},
  title     = {Rethinking the Inception Architecture for Computer Vision},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {2818--2826},
  year      = {2016},
}
@inproceedings{DBLP:journals/corr/abs-1805-00631,
  author    = {Biao Zhang and
               Deyi Xiong and
               Jinsong Su},
  title     = {Accelerating Neural Transformer via an Average Attention Network},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {1789--1798},
  year      = {2018},
}
@article{DBLP:journals/corr/CourbariauxB16,
  author    = {Matthieu Courbariaux and
               Yoshua Bengio},
  title     = {BinaryNet: Training Deep Neural Networks with Weights and Activations
               Constrained to +1 or -1},
  journal   = {CoRR},
  volume    = {abs/1602.02830},
  year      = {2016},
}
@inproceedings{Wu2019PayLA,
 author = {Felix Wu and
		 Angela Fan and
		 Alexei Baevski and
		 Yann N. Dauphin and
		 Michael Auli},
 title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
 publisher = {International Conference on Learning Representations},
 year = {2019},
}

@inproceedings{dai-etal-2019-transformer,
 author    = {Zihang Dai and
               Zhilin Yang and
               Yiming Yang and
               Jaime G. Carbonell and
               Quoc Viet Le and
               Ruslan Salakhutdinov},
  title     = {Transformer-XL: Attentive Language Models beyond a Fixed-Length Context},
  pages     = {2978--2988},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{Liu2020LearningTE,
	title={Learning to Encode Position for Transformer with Continuous Dynamical Model},
	author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh},
	journal={ArXiv},
	year={2020},
	volume={abs/2003.09229}
}
@inproceedings{Jawahar2019WhatDB,
	title={What Does BERT Learn about the Structure of Language?},
	author={Ganesh Jawahar and Beno{\^{\i}}t Sagot and Djam{\'e} Seddah},
	publisher={Annual Meeting of the Association for Computational Linguistics},
	year={2019}
}
@inproceedings{Yang2018ModelingLF,
	author    = {Baosong Yang and
               Zhaopeng Tu and
               Derek F. Wong and
               Fandong Meng and
               Lidia S. Chao and
               Tong Zhang},
  title     = {Modeling Localness for Self-Attention Networks},
  pages     = {4449--4458},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1904-03107,
	author = {Baosong Yang and
			Longyue Wang and
			Derek F. Wong and
			Lidia S. Chao and
			Zhaopeng Tu},
	title = {Convolutional Self-Attention Networks},
	pages = {4040--4045},
	publisher = {Annual Meeting of the Association for Computational Linguistics},
	year = {2019},
}
@article{Wang2018MultilayerRF,
  title={Multi-layer Representation Fusion for Neural Machine Translation},
  author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu},
  journal={ArXiv},
  year={2018},
  volume={abs/2002.06714}
}
@inproceedings{Bapna2018TrainingDN,
  author    = {Ankur Bapna and
               Mia Xu Chen and
               Orhan Firat and
               Yuan Cao and
               Yonghui Wu},
  title     = {Training Deeper Neural Machine Translation Models with Transparent
               Attention},
  pages     = {3028--3033},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{Dou2018ExploitingDR,
   author    = {Zi-Yi Dou and
               Zhaopeng Tu and
               Xing Wang and
               Shuming Shi and
               Tong Zhang},
  title     = {Exploiting Deep Representations for Neural Machine Translation},
  pages     = {4253--4262},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{Wang2019ExploitingSC,
  title={Exploiting Sentential Context for Neural Machine Translation},
  author={Xing Wang and Zhaopeng Tu and Longyue Wang and Shuming Shi},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2019}
}

@inproceedings{Dou2019DynamicLA,
  author    = {Zi-Yi Dou and
               Zhaopeng Tu and
               Xing Wang and
               Longyue Wang and
               Shuming Shi and
               Tong Zhang},
  title     = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement},
  pages     = {86--93},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2019}
}
@inproceedings{Wei2020MultiscaleCD,
  title={Multiscale Collaborative Deep Models for Neural Machine Translation},
  author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2020}
}

@inproceedings{Vaswani2018Tensor2TensorFN,
   author    = {Ashish Vaswani and
               Samy Bengio and
               Eugene Brevdo and
               Fran{\c{c}}ois Chollet and
               Aidan N. Gomez and
               Stephan Gouws and
               Llion Jones and
               Lukasz Kaiser and
               Nal Kalchbrenner and
               Niki Parmar and
               Ryan Sepassi and
               Noam Shazeer and
               Jakob Uszkoreit},
  title     = {Tensor2Tensor for Neural Machine Translation},
  pages     = {193--199},
  publisher = {Association for Machine Translation in the Americas},
  year      = {2018}
}

@article{Kitaev2020ReformerTE,
  author    = {Nikita Kitaev and
               Lukasz Kaiser and
               Anselm Levskaya},
  title     = {Reformer: The Efficient Transformer},
  journal = {International Conference on Learning Representations},
  year      = {2020}
}

@article{Lin2020WeightDT,
  title={Weight Distillation: Transferring the Knowledge in Neural Network Parameters},
  author={Ye Lin and Yanyang Li and Ziyang Wang and Bei Li and Quan Du and Tong Xiao and Jingbo Zhu},
  journal={ArXiv},
  year={2020},
  volume={abs/2009.09152}
}

@article{li2020shallow,
  title={Shallow-to-Deep Training for Neural Machine Translation},
  author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo},
  journal={Conference on Empirical Methods in Natural Language Processing},
  year={2020}
}
%%%%% chapter 12------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 13------------------------------------------------------

@inproceedings{garcia-martinez2016factored,
	title={Factored Neural Machine Translation Architectures},
	author={Mercedes {Garcia-Martinez} and Loïc {Barrault} and Fethi {Bougares}},
	booktitle={International Workshop on Spoken Language Translation (IWSLT'16)},
	notes={Sourced from Microsoft Academic - https://academic.microsoft.com/paper/2949810612},
	year={2016}
}

@inproceedings{DBLP:conf/acl/Kudo18,
  author    = {Taku Kudo},
  title     = {Subword Regularization: Improving Neural Network Translation Models
               with Multiple Subword Candidates},
  pages     = {66--75},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{DBLP:journals/jmlr/RaffelSRLNMZLL20,
  author    = {Colin Raffel and
               Noam Shazeer and
               Adam Roberts and
               Katherine Lee and
               Sharan Narang and
               Michael Matena and
               Yanqi Zhou and
               Wei Li and
               Peter J. Liu},
  title     = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text
               Transformer},
  journal   = {Journal of Machine Learning Reseach},
  volume    = {21},
  pages     = {140:1--140:67},
  year      = {2020}
}

@inproceedings{DBLP:conf/icassp/SchusterN12,
  author    = {Mike Schuster and
               Kaisuke Nakajima},
  title     = {Japanese and Korean voice search},
  pages     = {5149--5152},
  publisher = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  year      = {2012}
}

%%%%% chapter 13------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 14------------------------------------------------------
@inproceedings{Koehn2007Moses,
  author    = {Philipp Koehn and
               Hieu Hoang and
			    Alexandra Birch and
               Chris Callison-Burch and
               Marcello Federico and
               Nicola Bertoldi and
               Brooke Cowan and
               Wade Shen and
               Christine Moran and
               Richard Zens and
               Chris Dyer and
               Ondrej Bojar and
               Alexandra Constantin and
               Evan Herbst},
  title     = {Moses: Open Source Toolkit for Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{DBLP:conf/amta/Koehn04,
  author    = {Philipp Koehn},
  title     = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
               Translation Models},
  volume    = {3265},
  pages     = {115--124},
  publisher = {	Association for Machine Translation in the Americas},
  year      = {2004}
}

@inproceedings{DBLP:conf/emnlp/StahlbergHSB17,
  author    = {Felix Stahlberg and
               Eva Hasler and
               Danielle Saunders and
               Bill Byrne},
  title     = {SGNMT - A Flexible NMT Decoding Platform for Quick Prototyping
               of New Models and Search Strategies},
  pages     = {25--30},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}

@inproceedings{Liu2016AgreementOT,
  title={Agreement on Target-bidirectional Neural Machine Translation},
  author={Lemao Liu and
               Masao Utiyama and
               Andrew M. Finch and
               Eiichiro Sumita},
  pages     = {411--416},
  publisher = {	Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19,
  author    = {Bei Li and
               Yinqiao Li and
               Chen Xu and
               Ye Lin and
               Jiqiang Liu and
               Hui Liu and
               Ziyang Wang and
               Yuhao Zhang and
               Nuo Xu and
               Zeyang Wang and
               Kai Feng and
               Hexuan Chen and
               Tengbo Liu and
               Yanyang Li and
               Qiang Wang and
               Tong Xiao and
               Jingbo Zhu},
  title     = {The NiuTrans Machine Translation Systems for {WMT19}},
  pages     = {257--266},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/wmt/SennrichHB16,
  author    = {Rico Sennrich and
               Barry Haddow and
               Alexandra Birch},
  title     = {Edinburgh Neural Machine Translation Systems for {WMT} 16},
  pages     = {371--376},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@article{Stahlberg2018TheUO,
  title={The University of Cambridge's Machine Translation Systems for WMT18},
  author={Felix Stahlberg and
               Adri{\`{a}} de Gispert and
               Bill Byrne},
  pages     = {504--512},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@inproceedings{DBLP:conf/aaai/ZhangSQLJW18,
  author    = {Xiangwen Zhang and
               Jinsong Su and
               Yue Qin and
               Yang Liu and
               Rongrong Ji and
               Hongji Wang},
  title     = {Asynchronous Bidirectional Decoding for Neural Machine Translation},
  pages     = {5698--5705},
  publisher = {	AAAI Conference on Artificial Intelligence},
  year      = {2018}
}

@article{Li2017EnhancedNM,
  title={Enhanced neural machine translation by learning from draft},
  author={Aodong Li and
               Shiyue Zhang and
               Dong Wang and
               Thomas Fang Zheng},
  publisher={IEEE Asia-Pacific Services Computing Conference},
  year={2017},
  pages={1583-1587}
}

@inproceedings{ElMaghraby2018EnhancingTF,
  title={Enhancing Translation from English to Arabic Using Two-Phase Decoder Translation},
  author={Ayah ElMaghraby and Ahmed Rafea},
  pages     = {539--549},
  publisher = {Intelligent Systems and Applications},
  year      = {2018}
}

@inproceedings{Geng2018AdaptiveMD,
  title={Adaptive Multi-pass Decoder for Neural Machine Translation},
  author={Xinwei Geng and
               Xiaocheng Feng and
               Bing Qin and
               Ting Liu},
  publisher ={Conference on Empirical Methods in Natural Language Processing},
  pages={523--532},
  year={2018}
}

@article{Lee2018DeterministicNN,
  title={Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement},
  author={Jason Lee and Elman Mansimov and Kyunghyun Cho},
  pages     = {1173--1182},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
}

@inproceedings{Gu2019LevenshteinT,
  title={Levenshtein Transformer},
  author={Jiatao Gu and Changhan Wang and Jake Zhao},
  publisher = {Conference and Workshop on Neural Information Processing Systems},
  pages     = {11179--11189},
  year      = {2019},
}

@inproceedings{Guo2020JointlyMS,
  title={Jointly Masked Sequence-to-Sequence Model for Non-Autoregressive Neural Machine Translation},
  author={Junliang Guo and Linli Xu and Enhong Chen},
  pages     = {376--385},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@article{Stahlberg2018AnOS,
  title={An Operation Sequence Model for Explainable Neural Machine Translation},
  author={Felix Stahlberg and Danielle Saunders and Bill Byrne},
  pages     = {175--186},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
}

@inproceedings{Stern2019InsertionTF,
  title={Insertion Transformer: Flexible Sequence Generation via Insertion Operations},
  author={Mitchell Stern and William Chan and Jamie Kiros and Jakob Uszkoreit},
  publisher={International Conference on Machine Learning},
  pages={5976--5985},
  year={2019}
}

@article{stling2017NeuralMT,
  title={Neural machine translation for low-resource languages},
  author={Robert {\"O}stling and J{\"{o}}rg Tiedemann},
  journal={CoRR},
  year={2017},
  volume={abs/1708.05729}
}

@article{Kikuchi2016ControllingOL,
  title={Controlling Output Length in Neural Encoder-Decoders},
  author={Yuta Kikuchi and
               Graham Neubig and
               Ryohei Sasano and
               Hiroya Takamura and
               Manabu Okumura},
  pages     = {1328--1338},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}

@inproceedings{Takase2019PositionalET,
  title={Positional Encoding to Control Output Sequence Length},
  author={Sho Takase and
               Naoaki Okazaki},
  publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  pages={3999--4004},
  year={2019}
}

@inproceedings{Murray2018CorrectingLB,
  title={Correcting Length Bias in Neural Machine Translation},
  author={Kenton Murray and David Chiang},
  pages     = {212--223},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{Sountsov2016LengthBI,
  title={Length bias in Encoder Decoder Models and a Case for Global Conditioning},
  author={Pavel Sountsov and Sunita Sarawagi},
  pages     = {1516--1525},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}

@inproceedings{Jean2015MontrealNM,
  title={Montreal Neural Machine Translation Systems for WMT'15},
  author={S{\'{e}}bastien Jean and
               Orhan Firat and
               Kyunghyun Cho and
               Roland Memisevic and
               Yoshua Bengio},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={134--140},
  year={2015}
}

@inproceedings{Yang2018OtemUtemOA,
  title={Otem{\&}Utem: Over- and Under-Translation Evaluation Metric for NMT},
  author={Jing Yang and
               Biao Zhang and
               Yue Qin and
               Xiangwen Zhang and
               Qian Lin and
               Jinsong Su},
  publisher={CCF International Conference on Natural Language Processing and Chinese Computing},
  pages={291--302},
  year={2018}
}

@inproceedings{Mi2016CoverageEM,
  title={Coverage Embedding Models for Neural Machine Translation},
  author={Haitao Mi and
               Baskaran Sankaran and
               Zhiguo Wang and
               Abe Ittycheriah},
  pages     = {955--960},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}

@inproceedings{DBLP:conf/emnlp/HuangZM17,
  author    = {Liang Huang and
               Kai Zhao and
               Mingbo Ma},
  title     = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo
               beam size)},
  pages     = {2134--2139},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{Wiseman2016SequencetoSequenceLA,
  title={Sequence-to-Sequence Learning as Beam-Search Optimization},
  author={Sam Wiseman and Alexander M. Rush},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={1296--1306},
  year={2016}
}

@inproceedings{DBLP:conf/emnlp/Yang0M18,
  author    = {Yilin Yang and
               Liang Huang and
               Mingbo Ma},
  title     = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
               and Stopping Criteria for Neural Machine Translation},
  pages     = {3054--3059},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{Ma2019LearningTS,
  title={Learning to Stop in Structured Prediction for Neural Machine Translation},
  author={Mingbo Ma and
               Renjie Zheng and
               Liang Huang},
  pages     = {1884--1889},
  publisher = {	Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{KleinOpenNMT,
  author    = {Guillaume Klein and
               Yoon Kim and
               Yuntian Deng and
               Jean Senellart and
               Alexander M. Rush},
  title     = {OpenNMT: Open-Source Toolkit for Neural Machine Translation},
  pages     = {67--72},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{bahdanau2014neural,
  author    = {Dzmitry Bahdanau and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  publisher = {International Conference on Learning Representations},
  year      = {2015}
}

@inproceedings{Jiang2012LearnedPF,
  title={Learned Prioritization for Trading Off Accuracy and Speed},
  author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
  publisher={Conference and Workshop on Neural Information Processing Systems},
  pages={1340--1348},
  year= {2012}
}

@inproceedings{Zheng2020OpportunisticDW,
  title={Opportunistic Decoding with Timely Correction for Simultaneous Translation},
  author={Renjie Zheng and
               Mingbo Ma and
               Baigong Zheng and
               Kaibo Liu and
               Liang Huang},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={437--442},
  year={2020}
}

@inproceedings{Ma2019STACLST,
  title={STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework},
  author={Mingbo Ma and
               Liang Huang and
               Hao Xiong and
               Renjie Zheng and
               Kaibo Liu and
               Baigong Zheng and
               Chuanqiang Zhang and
               Zhongjun He and
               Hairong Liu and
               Xing Li and
               Hua Wu and
               Haifeng Wang},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={3025--3036},
  year={2019}
}

@inproceedings{Gimpel2013ASE,
  title={A Systematic Exploration of Diversity in Machine Translation},
  author={Kevin Gimpel and Dhruv Batra and Chris Dyer and Gregory Shakhnarovich},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={1100--1111},
  year={2013}
}

@article{Li2016MutualIA,
  title={Mutual Information and Diverse Decoding Improve Neural Machine Translation},
  author={Jiwei Li and Dan Jurafsky},
  journal={CoRR},
  year={2016},
  volume={abs/1601.00372}
}

@inproceedings{Li2016ADO,
  title={A Diversity-Promoting Objective Function for Neural Conversation Models},
  author={Jiwei Li and
               Michel Galley and
               Chris Brockett and
               Jianfeng Gao and
               Bill Dolan},
  publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  pages={110--119},
  year={2016}
}

@inproceedings{He2018SequenceTS,
  title={Sequence to Sequence Mixture Model for Diverse Machine Translation},
  author={Xuanli He and Gholamreza Haffari and Mohammad Norouzi},
  pages     = {583--592},
  publisher = {International Conference on Computational Linguistics},
  year      = {2018}
}

@article{Shen2019MixtureMF,
  title={Mixture Models for Diverse Machine Translation: Tricks of the Trade},
  author={Tianxiao Shen and Myle Ott and Michael Auli and Marc'Aurelio Ranzato},
  pages     = {5719--5728},
  publisher = {International Conference on Machine Learning},
  year      = {2019},
}

@article{Wu2020GeneratingDT,
  title={Generating Diverse Translation from Model Distribution with Dropout},
  author={Xuanfu Wu and Yang Feng and Chenze Shao},
  pages={1088--1097},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2020}
}

@inproceedings{Sun2020GeneratingDT,
  title={Generating Diverse Translation by Manipulating Multi-Head Attention},
  author={Zewei Sun and Shujian Huang and Hao Ran Wei and Xin Yu Dai and Jiajun Chen},
  publisher={AAAI Conference on Artificial Intelligence},
  pages={8976--8983},
  year={2020}
}

@article{Vijayakumar2016DiverseBS,
  title={Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models},
  author={Ashwin K. Vijayakumar and
               Michael Cogswell and
               Ramprasaath R. Selvaraju and
               Qing Sun and
               Stefan Lee and
               David J. Crandall and
               Dhruv Batra},
  journal={CoRR},
  year={2016},
  volume={abs/1610.02424}
}

@inproceedings{Liu2014SearchAwareTF,
  title={Search-Aware Tuning for Machine Translation},
  author={Lemao Liu and
               Liang Huang},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={1942--1952},
  year={2014}
}

@inproceedings{Yu2013MaxViolationPA,
  title={Max-Violation Perceptron and Forced Decoding for Scalable MT Training},
  author={Heng Yu and Liang Huang and Haitao Mi and Kai Zhao},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={1112--1123},
  year={2013}
}

@inproceedings{Stahlberg2019OnNS,
  title={On NMT Search Errors and Model Errors: Cat Got Your Tongue?},
  author={Felix Stahlberg and 
          Bill Byrne},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={3354--3360},
  year={2019}
}

@inproceedings{Niehues2017AnalyzingNM,
  title={Analyzing Neural MT Search and Model Performance},
  author={Jan Niehues and
               Eunah Cho and
               Thanh-Le Ha and
               Alex Waibel},
  pages={11--17},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2017}
}

@article{StahlbergNeural,
  title={Neural Machine Translation: A Review},
  author={Felix Stahlberg},
  journal={Journal of Artificial Intelligence Research},
  year={2020},
  volume={69},
  pages={343-418}
}

@article{Ranzato2016SequenceLT,
  title={Sequence Level Training with Recurrent Neural Networks},
  author={Marc'Aurelio Ranzato and
               Sumit Chopra and
               Michael Auli and
               Wojciech Zaremba},
  publisher={International Conference on Learning Representations},
  year={2016}
}

@article{Bengio2015ScheduledSF,
  title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
  author={Samy Bengio and
               Oriol Vinyals and
               Navdeep Jaitly and
               Noam Shazeer},
  booktitle = {Conference and Workshop on Neural Information Processing Systems},
  pages     = {1171--1179},
  year      = {2015}
}

@article{Zhang2019BridgingTG,
  title={Bridging the Gap between Training and Inference for Neural Machine Translation},
  author={Wen Zhang and Yang Feng and Fandong Meng and Di You and Qun Liu},
  pages     = {4334--4343},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/acl/ShenCHHWSL16,
  author    = {Shiqi Shen and
               Yong Cheng and
               Zhongjun He and
               Wei He and
               Hua Wu and
               Maosong Sun and
               Yang Liu},
  title     = {Minimum Risk Training for Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016},
}

@inproceedings{DBLP:conf/acl/SennrichHB16a,
  author    = {Rico Sennrich and
               Barry Haddow and
               Alexandra Birch},
  title     = {Neural Machine Translation of Rare Words with Subword Units},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016},
}

@inproceedings{DBLP:conf/emnlp/ZensSX12,
  author    = {Richard Zens and
               Daisy Stanton and
               Peng Xu},
  title     = {A Systematic Comparison of Phrase Table Pruning Techniques},
  pages     = {972--983},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}

@inproceedings{DBLP:conf/emnlp/JohnsonMFK07,
  author    = {Howard Johnson and
               Joel D. Martin and
               George F. Foster and
               Roland Kuhn},
  title     = {Improving Translation Quality by Discarding Most of the Phrasetable},
  pages     = {967--975},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{DBLP:conf/emnlp/LingGTB12,
  author    = {Wang Ling and
               Jo{\~{a}}o Gra{\c{c}}a and
               Isabel Trancoso and
               Alan W. Black},
  title     = {Entropy-based Pruning for Phrase-based Machine Translation},
  pages     = {962--971},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}

@article{Narang2017BlockSparseRN,
  title={Block-Sparse Recurrent Neural Networks},
  author={Sharan Narang and Eric Undersander and Gregory Diamos},
  journal={CoRR},
  year={2017},
  volume={abs/1711.02782}
}

@article{Gale2019TheSO,
  title={The State of Sparsity in Deep Neural Networks},
  author={Trevor Gale and
               Erich Elsen and
               Sara Hooker},
  journal={CoRR},
  year={2019},
  volume={abs/1902.09574}
}

@article{Michel2019AreSH,
  title={Are Sixteen Heads Really Better than One?},
  author    = {Paul Michel and
               Omer Levy and
               Graham Neubig},
  title     = {Are Sixteen Heads Really Better than One?},
  publisher = {Conference and Workshop on Neural Information Processing Systems},
  pages     = {14014--14024},
  year      = {2019}
}

@inproceedings{DBLP:journals/corr/abs-1905-09418,
  author    = {Elena Voita and
               David Talbot and
               Fedor Moiseev and
               Rico Sennrich and
               Ivan Titov},
  title     = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
               Lifting, the Rest Can Be Pruned},
  pages     = {5797--5808},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019},
}

@article{Kitaev2020ReformerTE,
  author    = {Nikita Kitaev and
               Lukasz Kaiser and
               Anselm Levskaya},
  title     = {Reformer: The Efficient Transformer},
  journal = {International Conference on Learning Representations},
  year      = {2020}
}

@article{Katharopoulos2020TransformersAR,
  title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention},
  author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret},
  journal={CoRR},
  year={2020},
  volume={abs/2006.16236}
}

@article{xiao2011language,
  title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation},
  author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua},
  volume ={10},
  number ={4},
  pages ={1--29},
  year ={2011},
  publisher ={ACM Transactions on Asian Language Information Processing (TALIP)}
}

@inproceedings{Li2009VariationalDF,
  title={Variational Decoding for Statistical Machine Translation},
  author={Zhifei Li and
               Jason Eisner and
               Sanjeev Khudanpur},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={593--601},
  year={2009}
}

@article{Bastings2019ModelingLS,
  title={Modeling Latent Sentence Structure in Neural Machine Translation},
  author={Jasmijn Bastings and
               Wilker Aziz and
               Ivan Titov and
               Khalil Sima'an},
  journal   = {CoRR},
  volume    = {abs/1901.06436},
  year      = {2019}
}

@article{Shah2018GenerativeNM,
  title={Generative Neural Machine Translation},
  author={Harshil Shah and
               David Barber},
  publisher={Conference and Workshop on Neural Information Processing Systems},
  pages={1353--1362},
  year={2018}
}

@article{Su2018VariationalRN,
  title={Variational Recurrent Neural Machine Translation},
  author={Jinsong Su and Shan Wu and Deyi Xiong and Yaojie Lu and Xianpei Han and Biao Zhang},
  publisher={AAAI Conference on Artificial Intelligence},
  pages={5488--5495},
  year={2018}
}

@inproceedings{DBLP:journals/corr/GehringAGYD17,
  author    = {Jonas Gehring and
               Michael Auli and
               David Grangier and
               Denis Yarats and
               Yann N. Dauphin},
  title     = {Convolutional Sequence to Sequence Learning},
  publisher = {International Conference on Machine Learning},
  volume    = {70},
  pages     = {1243--1252},
  year      = {2017}
}

@inproceedings{Wei2019ImitationLF,
  title={Imitation Learning for Non-Autoregressive Neural Machine Translation},
  author={Bingzhen Wei and Mingxuan Wang and Hao Zhou and Junyang Lin and Xu Sun},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages     = {1304--1312},
  year={2019}
}

@inproceedings{Shao2019RetrievingSI,
  title={Retrieving Sequential Information for Non-Autoregressive Neural Machine Translation},
  author={Chenze Shao and
               Yang Feng and
               Jinchao Zhang and
               Fandong Meng and
               Xilin Chen and
               Jie Zhou},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={3013--3024},
  year={2019}
}

@article{Akoury2019SyntacticallyST,
  title={Syntactically Supervised Transformers for Faster Neural Machine Translation},
  author={Nader Akoury and Kalpesh Krishna and Mohit Iyyer},
  pages     = {1269--1281},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019},
}

@article{Guo2020FineTuningBC,
  title={Fine-Tuning by Curriculum Learning for Non-Autoregressive Neural Machine Translation},
  author={Junliang Guo and
               Xu Tan and
               Linli Xu and
               Tao Qin and
               Enhong Chen and
               Tie-Yan Liu},
  pages     = {7839--7846},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
}

@inproceedings{Ran2020LearningTR,
  title={Learning to Recover from Multi-Modality Errors for Non-Autoregressive Neural Machine Translation},
  author={Qiu Ran and Yankai Lin and Peng Li and Jie Zhou},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={3059--3069},
  year={2020}
}

@article{Liu2020FastBERTAS,
  title={FastBERT: a Self-distilling BERT with Adaptive Inference Time},
  author={Weijie Liu and
               Peng Zhou and
               Zhiruo Wang and
               Zhe Zhao and
               Haotang Deng and
               Qi Ju},
  pages     = {6035--6044},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}


@article{Elbayad2020DepthAdaptiveT,
  title={Depth-Adaptive Transformer},
  author={Maha Elbayad and
               Jiatao Gu and
               Edouard Grave and
               Michael Auli},
  publisher={International Conference on Learning Representations},
  year={2020}
}

@article{Lan2020ALBERTAL,
  title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
  author={Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut},
  publisher={International Conference on Learning Representations}
}

@inproceedings{Han2015LearningBW,
  title={Learning both Weights and Connections for Efficient Neural Network},
  author={Song Han and
               Jeff Pool and
               John Tran and
               William J. Dally},
  publisher={Conference and Workshop on Neural Information Processing Systems},
  pages={1135--1143},
  year={2015}
}

@article{Lee2019SNIPSN,
  author    = {Namhoon Lee and
               Thalaiyasingam Ajanthan and
               Philip H. S. Torr},
  title     = {Snip: single-Shot Network Pruning based on Connection sensitivity},
  publisher = {International Conference on Learning Representations},
  year      = {2019},
}

@article{Frankle2019TheLT,
  title={The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
  author={Jonathan Frankle and Michael Carbin},
  publisher={International Conference on Learning Representations},
  year={2019}
}

@article{Brix2020SuccessfullyAT,
  author    = {Christopher Brix and
               Parnia Bahar and
               Hermann Ney},
  title     = {Successfully Applying the Stabilized Lottery Ticket Hypothesis to
               the Transformer Architecture},
  pages     = {3909--3915},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020},
}

@article{Liu2019RethinkingTV,
  title={Rethinking the Value of Network Pruning},
  author={Zhuang Liu and
               Mingjie Sun and
               Tinghui Zhou and
               Gao Huang and
               Trevor Darrell},
  journal={ArXiv},
  year={2019},
  volume={abs/1810.05270}
}

@article{Liu2017LearningEC,
author    = {Zhuang Liu and
               Jianguo Li and
               Zhiqiang Shen and
               Gao Huang and
               Shoumeng Yan and
               Changshui Zhang},
  title     = {Learning Efficient Convolutional Networks through Network Slimming},
  pages     = {2755--2763},
  publisher = {{IEEE} International Conference on Computer Vision},
  year      = {2017}
}

@inproceedings{Banner2018ScalableMF,
  title={Scalable Methods for 8-bit Training of Neural Networks},
  author={Ron Banner and
               Itay Hubara and
               Elad Hoffer and
               Daniel Soudry},
  publisher={Conference on Neural Information Processing Systems},
  pages={5151--5159},
  year={2018}
}

@article{Hubara2017QuantizedNN,
  title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations},
  author={Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio},
  journal={Journal of Machine Learning Reseach},
  year={2017},
  volume={18},
  pages={187:1-187:30}
}

@article{DBLP:journals/corr/HintonVD15,
  author    = {Geoffrey E. Hinton and
               Oriol Vinyals and
               Jeffrey Dean},
  title     = {Distilling the Knowledge in a Neural Network},
  journal   = {CoRR},
  volume    = {abs/1503.02531},
  year      = {2015}
}

@article{Munim2019SequencelevelKD,
  title={Sequence-level Knowledge Distillation for Model Compression of Attention-based Sequence-to-sequence Speech Recognition},
  author={Raden Mu'az Mun'im and Nakamasa Inoue and Koichi Shinoda},
  publisher={{IEEE} International Conference on Acoustics, Speech and Signal Processing},
  year={2019},
  pages={6151-6155}
}

@article{Tang2019DistillingTK,
  author    = {Raphael Tang and
               Yao Lu and
               Linqing Liu and
               Lili Mou and
               Olga Vechtomova and
               Jimmy Lin},
  title     = {Distilling Task-Specific Knowledge from {BERT} into Simple Neural
               Networks},
  journal   = {CoRR},
  volume    = {abs/1903.12136},
  year      = {2019}
}

@inproceedings{Jiao2020TinyBERTDB,
  author    = {Xiaoqi Jiao and
               Yichun Yin and
               Lifeng Shang and
               Xin Jiang and
               Xiao Chen and
               Linlin Li and
               Fang Wang and
               Qun Liu},
  title     = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
  pages     = {4163--4174},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  year={2020}
}

@article{Ghazvininejad2020AlignedCE,
  author    = {Marjan Ghazvininejad and
               Vladimir Karpukhin and
               Luke Zettlemoyer and
               Omer Levy},
  title     = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
  journal   = {CoRR},
  volume    = {abs/2004.01655},
  year      = {2020},
}

@inproceedings{Shao2020MinimizingTB,
  author    = {Chenze Shao and
               Jinchao Zhang and
               Yang Feng and
               Fandong Meng and
               Jie Zhou},
  title     = {Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural
               Machine Translation},
  pages     = {198--205},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020},
}

@inproceedings{Ma2019FlowSeqNC,
  title={FlowSeq: Non-Autoregressive Conditional Sequence Generation with Generative Flow},
  author={Xuezhe Ma and
               Chunting Zhou and
               Xian Li and
               Graham Neubig and
               Eduard H. Hovy},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={4281--4291},
  year={2019}
}

@inproceedings{Guo2019NonAutoregressiveNM,
  title={Non-Autoregressive Neural Machine Translation with Enhanced Decoder Input},
  author={Junliang Guo and
               Xu Tan and
               Di He and
               Tao Qin and
               Linli Xu and
               Tie-Yan Liu},
  pages={3723--3730},
  publisher={AAAI Conference on Artificial Intelligence},
  year={2019}
}

@article{Ran2019GuidingNN,
  author    = {Qiu Ran and
               Yankai Lin and
               Peng Li and
               Jie Zhou},
  title     = {Guiding Non-Autoregressive Neural Machine Translation Decoding with
               Reordering Information},
  journal   = {CoRR},
  volume    = {abs/1911.02215},
  year      = {2019}
}

@inproceedings{vaswani2017attention,
	title={Attention is All You Need},
	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
	publisher={International Conference on Neural Information Processing},
	pages={5998--6008},
	year={2017}
}

@inproceedings{Gu2017NonAutoregressiveNM,
  author    = {Jiatao Gu and
               James Bradbury and
               Caiming Xiong and
               Victor O. K. Li and
               Richard Socher},
  title     = {Non-Autoregressive Neural Machine Translation},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@article{Zhou2020UnderstandingKD,
  title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
  author={Chunting Zhou and Graham Neubig and Jiatao Gu},
  journal={ArXiv},
  year={2020},
  volume={abs/1911.02727}
}

@inproceedings{Wang2019NonAutoregressiveMT,
  title={Non-Autoregressive Machine Translation with Auxiliary Regularization},
  author={Yiren Wang and
               Fei Tian and
               Di He and
               Tao Qin and
               ChengXiang Zhai and
               Tie-Yan Liu},
  publisher={AAAI Conference on Artificial Intelligence},
  pages={5377--5384},
  year={2019}
}

@inproceedings{Kaiser2018FastDI,
  title={Fast Decoding in Sequence Models using Discrete Latent Variables},
  author={Łukasz Kaiser and Aurko Roy and Ashish Vaswani and Niki Parmar and Samy Bengio and Jakob Uszkoreit and Noam Shazeer},
  publisher={International Conference on Machine Learning},
  pages={2395--2404},
  year={2018}
}

@article{Tu2020ENGINEEI,
  title={ENGINE: Energy-Based Inference Networks for Non-Autoregressive Machine Translation},
  author={Lifu Tu and Richard Yuanzhe Pang and Sam Wiseman and Kevin Gimpel},
  pages={2819--2826},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2020}
}

@inproceedings{Shu2020LatentVariableNN,
  title={Latent-Variable Non-Autoregressive Neural Machine Translation with Deterministic Inference using a Delta Posterior},
  author={Raphael Shu and Jason Lee and Hideki Nakayama and Kyunghyun Cho},
  publisher={AAAI Conference on Artificial Intelligence},
  pages={8846--8853},
  year={2020}
}

@inproceedings{Li2019HintBasedTF,
  title={Hint-Based Training for Non-Autoregressive Machine Translation},
  author={Zhuohan Li and
               Zi Lin and
               Di He and
               Fei Tian and
               Tao Qin and
               Liwei Wang and
               Tie-Yan Liu},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={5707--5712},
  year={2019}
}

@inproceedings{Ho2016ModelFreeIL,
  title={Model-Free Imitation Learning with Policy Optimization},
  author={Jonathan Ho and
               Jayesh K. Gupta and
               Stefano Ermon},
  publisher={International Conference on Machine Learning},
  pages={2760--2769},
  year={2016}
}

@inproceedings{Ho2016GenerativeAI,
  title={Generative Adversarial Imitation Learning},
  author={Jonathan Ho and Stefano Ermon},
  publisher={Conference and Workshop on Neural Information Processing Systems},
  pages={4565--4573},
  year={2016}
}

@article{Duan2017OneShotIL,
  title={One-Shot Imitation Learning},
  author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
  journal={CoRR},
  year={2017},
  volume={abs/1703.07326}
}

@inproceedings{Wang2018SemiAutoregressiveNM,
  title={Semi-Autoregressive Neural Machine Translation},
  author={Chunqi Wang and
               Ji Zhang and
               Haiqing Chen},
  booktitle={Conference on Empirical Methods in Natural Language Processing},
  pages={479--488},
  year={2018}
}

@inproceedings{Ghazvininejad2019MaskPredictPD,
  title={Mask-Predict: Parallel Decoding of Conditional Masked Language Models},
  author={Marjan Ghazvininejad and Omer Levy and Yinhan Liu and Luke Zettlemoyer},
  publisher={Conference on Empirical Methods in Natural Language Processing},
  pages={6111--6120},
  year={2019}
}

@article{Kasai2020NonAutoregressiveMT,
  title={Non-Autoregressive Machine Translation with Disentangled Context Transformer},
  author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu},
  journal={arXiv: Computation and Language},
  year={2020}
}

@article{Zhou2019SynchronousBN,
  title={Synchronous Bidirectional Neural Machine Translation},
  author={Long Zhou and
               Jiajun Zhang and
               Chengqing Zong},
  journal={Transactions of the Association for Computational Linguistics},
  year={2019},
  volume={7},
  pages={91-105}
}

@article{devlin2019bert,
  title={Bert: Pre-training of deep bidirectional transformers for language understanding},
  author={Devlin Jacob and Chang Ming-Wei and Lee Kenton and Toutanova Kristina},
  year={2019},
  pages = {4171--4186},
  journal = {Annual Meeting of the Association for Computational Linguistics},
}

@inproceedings{Feng2016ImprovingAM,
  title={Improving Attention Modeling with Implicit Distortion and Fertility for Machine Translation},
  author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and Ming Zhou and Kenny Q. Zhu},
  booktitle={International Conference on Computational Linguistics},
  pages={3082--3092},
  year={2016}
}

@inproceedings{TuModeling,
  author    = {Zhaopeng Tu and
               Zhengdong Lu and
               Yang Liu and
               Xiaohua Liu and
               Hang Li},
  title     = {Modeling Coverage for Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@article{Wu2016GooglesNM,
  title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
  author    = {Yonghui Wu and
               Mike Schuster and
               Zhifeng Chen and
               Quoc V. Le and
               Mohammad Norouzi and
               Wolfgang Macherey and
               Maxim Krikun and
               Yuan Cao and
               Qin Gao and
               Klaus Macherey and
               Jeff Klingner and
               Apurva Shah and
               Melvin Johnson and
               Xiaobing Liu and
               Lukasz Kaiser and
               Stephan Gouws and
               Yoshikiyo Kato and
               Taku Kudo and
               Hideto Kazawa and
               Keith Stevens and
               George Kurian and
               Nishant Patil and
               Wei Wang and
               Cliff Young and
               Jason Smith and
               Jason Riesa and
               Alex Rudnick and
               Oriol Vinyals and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  journal   = {CoRR},
  year={2016},
  volume={abs/1609.08144}
}

@inproceedings{li-etal-2018-simple,
  author    = {Yanyang Li and
               Tong Xiao and
               Yinqiao Li and
               Qiang Wang and
               Changming Xu and
               Jingbo Zhu},
  title     = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {292--297},
  year      = {2018}
}

@article{Peris2017InteractiveNM,
  title={Interactive neural machine translation},
  author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta},
  journal={Computer Speech and Language},
  year={2017},
  volume={45},
  pages={201-220}
}

@inproceedings{Peris2018ActiveLF,
  title={Active Learning for Interactive Neural Machine Translation of Data Streams},
  author={{\'A}lvaro Peris and Francisco Casacuberta},
  publisher={The SIGNLL Conference on Computational Natural Language Learning},
  pages={151--160},
  year={2018}
}

@article{Xiao2016ALA,
  title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems},
  author={Tong Xiao and Derek F. Wong and Jingbo Zhu},
  journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  year={2016},
  volume={24},
  pages={2069-2083}
}

@inproceedings{DBLP:conf/acl/JeanCMB15,
  author    = {S{\'{e}}bastien Jean and
               KyungHyun Cho and
               Roland Memisevic and
               Yoshua Bengio},
  title     = {On Using Very Large Target Vocabulary for Neural Machine Translation},
  pages     = {1--10},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}

@article{61115,
  author={Jianhua Lin},
  journal={IEEE Transactions on Information Theory}, 
  title={Divergence measures based on the Shannon entropy}, 
  year={1991},
  volume={37},
  number={1},
  pages={145-151}
}

@inproceedings{DBLP:conf/aaai/DabreF19,
  author    = {Raj Dabre and
               Atsushi Fujita},
  title     = {Recurrent Stacking of Layers for Compact Neural Machine Translation
               Models},
  pages     = {6292--6299},
  publisher = {	AAAI Conference on Artificial Intelligence},
  year      = {2019}
}

@inproceedings{DBLP:journals/corr/abs-1805-00631,
  author    = {Biao Zhang and
               Deyi Xiong and
               Jinsong Su},
  title     = {Accelerating Neural Transformer via an Average Attention Network},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  pages     = {1789--1798},
  year      = {2018},
}

@inproceedings{Wu2019PayLA,
 author = {Felix Wu and
		 Angela Fan and
		 Alexei Baevski and
		 Yann N. Dauphin and
		 Michael Auli},
 title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
 publisher = {International Conference on Learning Representations},
 year = {2019},
}

@inproceedings{Xiao2019SharingAW,
  author    = {Tong Xiao and
               Yinqiao Li and
               Jingbo Zhu and
               Zhengtao Yu and
               Tongran Liu},
  title     = {Sharing Attention Weights for Fast Transformer},
  publisher = {International Joint Conference on Artificial Intelligence},
  pages     = {5292--5298},
  year      = {2019}
}

@inproceedings{Chen2018TheBO,
  author    = {Mia Xu Chen and
               Orhan Firat and
               Ankur Bapna and
               Melvin Johnson and
               Wolfgang Macherey and
               George F. Foster and
               Llion Jones and
               Mike Schuster and
               Noam Shazeer and
               Niki Parmar and
               Ashish Vaswani and
               Jakob Uszkoreit and
               Lukasz Kaiser and
               Zhifeng Chen and
               Yonghui Wu and
               Macduff Hughes},
  title     = {The Best of Both Worlds: Combining Recent Advances in Neural Machine
               Translation},
  pages     = {76--86},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{DBLP:journals/corr/abs-1906-00532,
  author    = {Aishwarya Bhandare and
               Vamsi Sripathi and
               Deepthi Karkada and
               Vivek Menon and
               Sun Choi and
               Kushal Datta and
               Vikram Saletore},
  title     = {Efficient 8-Bit Quantization of Transformer Neural Machine Language
               Translation Model},
  journal   = {CoRR},
  volume    = {abs/1906.00532},
  year      = {2019}
}


@inproceedings{DBLP:conf/cvpr/JacobKCZTHAK18,
  author    = {Benoit Jacob and
               Skirmantas Kligys and
               Bo Chen and
               Menglong Zhu and
               Matthew Tang and
               Andrew G. Howard and
               Hartwig Adam and
               Dmitry Kalenichenko},
  title     = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only
               Inference},
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  pages     = {2704--2713},
  year      = {2018}
}

@article{DBLP:journals/corr/abs-1910-10485,
  author    = {Gabriele Prato and
               Ella Charlaix and
               Mehdi Rezagholizadeh},
  title     = {Fully Quantized Transformer for Improved Translation},
  journal   = {CoRR},
  volume    = {abs/1910.10485},
  year      = {2019}
}

@inproceedings{DBLP:conf/nips/HubaraCSEB16,
  author    = {Itay Hubara and
               Matthieu Courbariaux and
               Daniel Soudry and
               Ran El-Yaniv and
               Yoshua Bengio},
  title     = {Binarized Neural Networks},
  publisher = {Conference and Workshop on Neural Information Processing Systems},
  pages     = {4107--4115},
  year      = {2016}
}

@article{DBLP:journals/jcss/FreundS97,
  author    = {Yoav Freund and
               Robert E. Schapire},
  title     = {A Decision-Theoretic Generalization of On-Line Learning and an Application
               to Boosting},
  journal   = {Journal of Computer and System Sciences},
  volume    = {55},
  number    = {1},
  pages     = {119--139},
  year      = {1997}
}

@inproceedings{DBLP:conf/acl/XiaoZZW10,
  author    = {Tong Xiao and
               Jingbo Zhu and
               Muhua Zhu and
               Huizhen Wang},
  title     = {Boosting-Based System Combination for Machine Translation},
  pages     = {739--748},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}

@inproceedings{DBLP:conf/icassp/SimBGSW07,
  author    = {Khe Chai Sim and
               William J. Byrne and
               Mark J. F. Gales and
               Hichem Sahbi and
               Philip C. Woodland},
  title     = {Consensus Network Decoding for Statistical Machine Translation System
               Combination},
  publisher = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
               and Signal Processing},
  pages     = {105--108},
  year      = {2007}
}

@inproceedings{DBLP:conf/acl/RostiMS07,
  author    = {Antti-Veikko I. Rosti and
               Spyridon Matsoukas and
               Richard M. Schwartz},
  title     = {Improved Word-Level System Combination for Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{DBLP:conf/wmt/RostiZMS08,
  author    = {Antti-Veikko I. Rosti and
               Bing Zhang and
               Spyros Matsoukas and
               Richard M. Schwartz},
  title     = {Incremental Hypothesis Alignment for Building Confusion Networks with
               Application to Machine Translation System Combination},
  publisher = {Proceedings of the Third Workshop on Statistical Machine Translation},
  pages     = {183--186},
  year      = {2008}
}

@inproceedings{DBLP:conf/emnlp/DuanLXZ09,
  author    = {Nan Duan and
               Mu Li and
               Tong Xiao and
               Ming Zhou},
  title     = {The Feature Subspace Method for SMT System Combination},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {1096--1104},
  year      = {2009}
}

@article{DBLP:journals/corr/LiMJ16,
  author    = {Jiwei Li and
               Will Monroe and
               Dan Jurafsky},
  title     = {A Simple, Fast Diverse Decoding Algorithm for Neural Generation},
  journal   = {CoRR},
  volume    = {abs/1611.08562},
  year      = {2016}
}

@article{xiao2013bagging,
  title ={Bagging and boosting statistical machine translation systems},
  author ={Tong Xiao and Jingbo Zhu and Tongran Liu },
  publisher ={Artificial Intelligence},
  volume ={195},
  pages ={496--527},
  year ={2013}
}

@inproceedings{DBLP:conf/emnlp/TrombleKOM08,
  author    = {Roy Tromble and
               Shankar Kumar and
               Franz Josef Och and
               Wolfgang Macherey},
  title     = {Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  pages     = {620--629},
  year      = {2008}
}

@inproceedings{DBLP:conf/aaai/SuTXJSL17,
  author    = {Jinsong Su and
               Zhixing Tan and
               Deyi Xiong and
               Rongrong Ji and
               Xiaodong Shi and
               Yang Liu},
  title     = {Lattice-Based Recurrent Neural Network Encoders for Neural Machine
               Translation},
  publisher = {AAAI Conference on Artificial Intelligence},
  pages     = {3302--3308},
  year      = {2017}
}

@inproceedings{Shaw2018SelfAttentionWR,
  author    = {Peter Shaw and
               Jakob Uszkoreit and
               Ashish Vaswani},
  title     = {Self-Attention with Relative Position Representations},
  publisher = {Proceedings of the Human Language Technology Conference of 
               the North American Chapter of the Association for Computational Linguistics},
  pages     = {464--468},
  year      = {2018}
}

@inproceedings{WangLearning,
  author    = {Qiang Wang and
               Bei Li and
               Tong Xiao and
               Jingbo Zhu and
               Changliang Li and
               Derek F. Wong and
               Lidia S. Chao},
  title     = {Learning Deep Transformer Models for Machine Translation},
  pages     = {1810--1822},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/iclr/FanGJ20,
  author    = {Angela Fan and
               Edouard Grave and
               Armand Joulin},
  title     = {Reducing Transformer Depth on Demand with Structured Dropout},
  publisher = {International Conference on Learning Representations},
  year      = {2020}
}

@inproceedings{DBLP:conf/emnlp/WangXZ20,
  author    = {Qiang Wang and
               Tong Xiao and
               Jingbo Zhu},
  title     = {Training Flexible Depth Model by Multi-Task Learning for Neural Machine
               Translation},
  pages     = {4307--4312},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-2002-02925,
  author    = {Canwen Xu and
               Wangchunshu Zhou and
               Tao Ge and
               Furu Wei and
               Ming Zhou},
  title     = {BERT-of-Theseus: Compressing {BERT} by Progressive Module Replacing},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2020}
}

@inproceedings{DBLP:conf/iclr/BaevskiA19,
  author    = {Alexei Baevski and
               Michael Auli},
  title     = {Adaptive Input Representations for Neural Language Modeling},
  journal   = {arXiv preprint arXiv:1809.10853},
  year      = {2019}
}

@article{DBLP:journals/corr/abs-2006-04768,
  author    = {Sinong Wang and
               Belinda Z. Li and
               Madian Khabsa and
               Han Fang and
               Hao Ma},
  title     = {Linformer: Self-Attention with Linear Complexity},
  journal   = {CoRR},
  volume    = {abs/2006.04768},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-1911-12385,
  author    = {Sachin Mehta and
               Rik Koncel-Kedziorski and
               Mohammad Rastegari and
               Hannaneh Hajishirzi},
  title     = {DeFINE: DEep Factorized INput Word Embeddings for Neural Sequence
               Modeling},
  journal   = {CoRR},
  volume    = {abs/1911.12385},
  year      = {2019}
}

@article{DBLP:journals/corr/abs-1906-09777,
  author    = {Xindian Ma and
               Peng Zhang and
               Shuai Zhang and
               Nan Duan and
               Yuexian Hou and
               Dawei Song and
               Ming Zhou},
  title     = {A Tensorized Transformer for Language Modeling},
  journal   = {CoRR},
  volume    = {abs/1906.09777},
  year      = {2019}
}

@inproceedings{DBLP:conf/nips/YangLSL19,
  author    = {Zhilin Yang and
               Thang Luong and
               Ruslan Salakhutdinov and
               Quoc V. Le},
  title     = {Mixtape: Breaking the Softmax Bottleneck Efficiently},
  booktitle = {Conference on Neural Information Processing Systems},
  pages     = {15922--15930},
  year      = {2019}
}

@article{DBLP:journals/corr/abs-2006-10369,
  author    = {Jungo Kasai and
               Nikolaos Pappas and
               Hao Peng and
               James Cross and
               Noah A. Smith},
  title     = {Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff
               in Machine Translation},
  journal   = {CoRR},
  volume    = {abs/2006.10369},
  year      = {2020}
}

@inproceedings{DBLP:conf/aclnmt/HuLLLLWXZ20,
  author    = {Chi Hu and
               Bei Li and
               Yinqiao Li and
               Ye Lin and
               Yanyang Li and
               Chenglong Wang and
               Tong Xiao and
               Jingbo Zhu},
  title     = {The NiuTrans System for WNGT 2020 Efficiency Task},
  pages     = {204--210},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-2010-02416,
  author    = {Yi-Te Hsu and
               Sarthak Garg and
               Yi-Hsiu Liao and
               Ilya Chatsviorkin},
  title     = {Efficient Inference For Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/2010.02416},
  year      = {2020}
}

@inproceedings{Vaswani2018Tensor2TensorFN,
   author    = {Ashish Vaswani and
               Samy Bengio and
               Eugene Brevdo and
               Fran{\c{c}}ois Chollet and
               Aidan N. Gomez and
               Stephan Gouws and
               Llion Jones and
               Lukasz Kaiser and
               Nal Kalchbrenner and
               Niki Parmar and
               Ryan Sepassi and
               Noam Shazeer and
               Jakob Uszkoreit},
  title     = {Tensor2Tensor for Neural Machine Translation},
  pages     = {193--199},
  publisher = {Association for Machine Translation in the Americas},
  year      = {2018}
}

@inproceedings{Sun2019BaiduNM,
  title={Baidu Neural Machine Translation Systems for WMT19},
  author    = {Meng Sun and
               Bojian Jiang and
               Hao Xiong and
               Zhongjun He and
               Hua Wu and
               Haifeng Wang},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages     = {374--381},
  year={2019}
}

@inproceedings{Wang2018TencentNM,
  title={Tencent Neural Machine Translation Systems for WMT18},
  author={Mingxuan Wang and
          Li Gong and
          Wenhuan Zhu and
          Jun Xie and
          Chao Bian},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={522--527},
  year={2018}
}

@article{Bi2019MultiagentLF,
  title={Multi-agent Learning for Neural Machine Translation},
  author={Tianchi Bi and 
          Hao Xiong and 
		  Zhongjun He and 
		  Hua Wu and 
		  Haifeng Wang},
  publisher={arXiv preprint arXiv:1909.01101},
  year={2019}
}

@inproceedings{DBLP:conf/aclnmt/KoehnK17,
  author    = {Philipp Koehn and
               Rebecca Knowles},
  title     = {Six Challenges for Neural Machine Translation},
  pages     = {28--39},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}


@inproceedings{Held2013AppliedSI,
  title={Applied statistical inference},
  author={Leonhard Held and Saban{\'e}s Bov{\'e}, D},
  volume={10},
  number={978-3},
  pages={16},
  year={2014},
  publisher={Springer}
}



@inproceedings{Zhang2016VariationalNM,
  title={Variational Neural Machine Translation},
  author    = {Biao Zhang and
               Deyi Xiong and
               Jinsong Su and
               Hong Duan and
               Min Zhang},
  pages     = {521--530},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{Silvey2018StatisticalI,
  title={Statistical Inference},
  author={S. D. Silvey},
  publisher={Encyclopedia of Social Network Analysis and Mining},
  year={2018}
}

@inproceedings{Cheong2019transformersZ,
  title={transformers.zip : Compressing Transformers with Pruning and Quantization},
  author={Robin Cheong and Robel Daniel},
  publisher={Stanford University},
  year={2019}
}

@inproceedings{Beal2003VariationalAF,
  title={Variational algorithms for approximate Bayesian inference},
  author={Matthew J. Beal},
  publisher={University College London},
  year={2003}
}

@article{Gage1994ANA,
  title={A new algorithm for data compression},
  author={P. Gage},
  journal={The C Users Journal archive},
  year={1994},
  volume={12},
  pages={23-38}
}

@inproceedings{Eisner2011LearningST,
  title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
  author={J. Eisner and Hal Daum{\'e}},
  publisher={Conference and Workshop on Neural Information Processing Systems},
  year={2011}
}

@article{Kazimi2017CoverageFC,
  title={Coverage for Character Based Neural Machine Translation},
  author={M. Kazimi and Marta R. Costa-juss{\`a}},
  journal={arXiv preprint arXiv:1810.02340},
  year={2017},
  volume={59},
  pages={99-106}
}
%%%%% chapter 14------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 15------------------------------------------------------

@inproceedings{DBLP:conf/cvpr/YuYR18,
  author    = {Xin Yu and
               Zhiding Yu and
               Srikumar Ramalingam},
  title     = {Learning Strict Identity Mappings in Deep Residual Networks},
  pages     = {4432--4440},
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  year      = {2018}
}

@inproceedings{DBLP:conf/emnlp/ZhangTS19,
  author    = {Biao Zhang and
               Ivan Titov and
               Rico Sennrich},
  title     = {Improving Deep Transformer with Depth-Scaled Initialization and Merged
               Attention},
  pages     = {898--909},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/eccv/HeZRS16,
  author    = {Kaiming He and
               Xiangyu Zhang and
               Shaoqing Ren and
               Jian Sun},
  title     = {Identity Mappings in Deep Residual Networks},
  volume    = {9908},
  pages     = {630--645},
  publisher = {European Conference on Computer Vision},
  year      = {2016}
}

@inproceedings{Ottfairseq,
  author    = {Myle Ott and
               Sergey Edunov and
               Alexei Baevski and
               Angela Fan and
               Sam Gross and
               Nathan Ng and
               David Grangier and
               Michael Auli},
  title     = {fairseq: {A} Fast, Extensible Toolkit for Sequence Modeling},
  pages     = {48--53},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{KleinOpenNMT,
  author    = {Guillaume Klein and
               Yoon Kim and
               Yuntian Deng and
               Jean Senellart and
               Alexander M. Rush},
  title     = {OpenNMT: Open-Source Toolkit for Neural Machine Translation},
  pages     = {67--72},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{DBLP:conf/acl/WuWXTGQLL19,
  author    = {Lijun Wu and
               Yiren Wang and
               Yingce Xia and
               Fei Tian and
               Fei Gao and
               Tao Qin and
               Jianhuang Lai and
               Tie-Yan Liu},
  title     = {Depth Growing for Neural Machine Translation},
  pages     = {5558--5563},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/cvpr/HuangLMW17,
  author    = {Gao Huang and
               Zhuang Liu and
               Laurens van der Maaten and
               Kilian Q. Weinberger},
  title     = {Densely Connected Convolutional Networks},
  pages     = {2261--2269},
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  year      = {2017}
}

@article{DBLP:journals/corr/GreffSS16,
  author    = {Klaus Greff and
               Rupesh Kumar Srivastava and
               J{\"{u}}rgen Schmidhuber},
  title     = {Highway and Residual Networks learn Unrolled Iterative Estimation},
  publisher = {International Conference on Learning Representations},
  year      = {2017}
}

@inproceedings{Bapna2018TrainingDN,
  author    = {Ankur Bapna and
               Mia Xu Chen and
               Orhan Firat and
               Yuan Cao and
               Yonghui Wu},
  title     = {Training Deeper Neural Machine Translation Models with Transparent
               Attention},
  pages     = {3028--3033},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@inproceedings{WangLearning,
  author    = {Qiang Wang and
               Bei Li and
               Tong Xiao and
               Jingbo Zhu and
               Changliang Li and
               Derek F. Wong and
               Lidia S. Chao},
  title     = {Learning Deep Transformer Models for Machine Translation},
  pages     = {1810--1822},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@article{DBLP:journals/corr/abs-2002-04745,
  author    = {Ruibin Xiong and
               Yunchang Yang and
               Di He and
               Kai Zheng and
               Shuxin Zheng and
               Chen Xing and
               Huishuai Zhang and
               Yanyan Lan and
               Liwei Wang and
               Tie-Yan Liu},
  title     = {On Layer Normalization in the Transformer Architecture},
  journal   = {CoRR},
  volume    = {abs/2002.04745},
  year      = {2020}
}

@inproceedings{DBLP:conf/emnlp/LiuLGCH20,
  author    = {Liyuan Liu and
               Xiaodong Liu and
               Jianfeng Gao and
               Weizhu Chen and
               Jiawei Han},
  title     = {Understanding the Difficulty of Training Transformers},
  pages     = {5747--5763},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@inproceedings{DBLP:journals/corr/HeZRS15,
  author    = {Kaiming He and
               Xiangyu Zhang and
               Shaoqing Ren and
               Jian Sun},
  title     = {Deep Residual Learning for Image Recognition},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {770--778},
  year      = {2016},
}

@article{Ba2016LayerN,
  author    = {Lei Jimmy Ba and
               Jamie Ryan Kiros and
               Geoffrey E. Hinton},
  title     = {Layer Normalization},
  journal   = {CoRR},
  volume    = {abs/1607.06450},
  year      = {2016}
}

@inproceedings{Vaswani2018Tensor2TensorFN,
   author    = {Ashish Vaswani and
               Samy Bengio and
               Eugene Brevdo and
               Fran{\c{c}}ois Chollet and
               Aidan N. Gomez and
               Stephan Gouws and
               Llion Jones and
               Lukasz Kaiser and
               Nal Kalchbrenner and
               Niki Parmar and
               Ryan Sepassi and
               Noam Shazeer and
               Jakob Uszkoreit},
  title     = {Tensor2Tensor for Neural Machine Translation},
  pages     = {193--199},
  publisher = {Association for Machine Translation in the Americas},
  year      = {2018}
}

@inproceedings{Dou2019DynamicLA,
  author    = {Zi-Yi Dou and
               Zhaopeng Tu and
               Xing Wang and
               Longyue Wang and
               Shuming Shi and
               Tong Zhang},
  title     = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement},
  pages     = {86--93},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2019}
}

@article{Wang2018MultilayerRF,
  title={Multi-layer Representation Fusion for Neural Machine Translation},
  author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu},
  journal={ArXiv},
  year={2018},
  volume={abs/2002.06714}
}

@inproceedings{Dou2018ExploitingDR,
   author    = {Zi-Yi Dou and
               Zhaopeng Tu and
               Xing Wang and
               Shuming Shi and
               Tong Zhang},
  title     = {Exploiting Deep Representations for Neural Machine Translation},
  pages     = {4253--4262},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@inproceedings{DBLP:journals/corr/LinFSYXZB17,
  author    = {Zhouhan Lin and
               Minwei Feng and
               C{\'{\i}}cero Nogueira dos Santos and
               Mo Yu and
               Bing Xiang and
               Bowen Zhou and
               Yoshua Bengio},
  title     = {A Structured Self-Attentive Sentence Embedding},
  publisher = {International Conference on Learning Representations},
  year      = {2017},
}

@inproceedings{DBLP:conf/nips/SrivastavaGS15,
  author    = {Rupesh Kumar Srivastava and
               Klaus Greff and
               J{\"{u}}rgen Schmidhuber},
  title     = {Training Very Deep Networks},
  publisher = {Conference on Neural Information Processing Systems},
  pages     = {2377--2385},
  year      = {2015}
}

@inproceedings{DBLP:conf/icml/BalduzziFLLMM17,
  author    = {David Balduzzi and
               Marcus Frean and
               Lennox Leary and
               J. P. Lewis and
               Kurt Wan-Duo Ma and
               Brian McWilliams},
  title     = {The Shattered Gradients Problem: If resnets are the answer, then what
               is the question?},
  publisher = {International Conference on Machine Learning},
  volume    = {70},
  pages     = {342--350},
  year      = {2017}
}

@inproceedings{DBLP:conf/icml/Allen-ZhuLS19,
  author    = {Zeyuan Allen-Zhu and
               Yuanzhi Li and
               Zhao Song},
  title     = {A Convergence Theory for Deep Learning via Over-Parameterization},
  publisher = {International Conference on Machine Learning},
  volume    = {97},
  pages     = {242--252},
  year      = {2019}
}

@inproceedings{DBLP:conf/icml/DuLL0Z19,
  author    = {Simon S. Du and
               Jason D. Lee and
               Haochuan Li and
               Liwei Wang and
               Xiyu Zhai},
  title     = {Gradient Descent Finds Global Minima of Deep Neural Networks},
  publisher = {International Conference on Machine Learning},
  volume    = {97},
  pages     = {1675--1685},
  year      = {2019}
}

@inproceedings{pmlr-v9-glorot10a,
  author    = {Xavier Glorot and
               Yoshua Bengio},
  title     = {Understanding the difficulty of training deep feedforward neural networks},
  publisher = {International Conference on Artificial Intelligence and Statistics},
  volume    = {9},
  pages     = {249--256},
  year      = {2010}
}

@inproceedings{DBLP:conf/iccv/HeZRS15,
  author    = {Kaiming He and
               Xiangyu Zhang and
               Shaoqing Ren and
               Jian Sun},
  title     = {Delving Deep into Rectifiers: Surpassing Human-Level Performance on
               ImageNet Classification},
  pages     = {1026--1034},
  publisher = {IEEE International Conference on Computer Vision},
  year      = {2015}
}

@inproceedings{huang2020improving,
	title={Improving Transformer Optimization Through Better Initialization},
	author={Xiao Shi {Huang} and Juan {Perez} and Jimmy {Ba} and Maksims {Volkovs}},
  publisher = {International Conference on Machine Learning},
	year={2020}
}

@inproceedings{DBLP:conf/iclr/ZophL17,
  author    = {Barret Zoph and
               Quoc V. Le},
  title     = {Neural Architecture Search with Reinforcement Learning},
  publisher = {International Conference on Learning Representations},
  year      = {2017}
}

@inproceedings{DBLP:conf/cvpr/ZophVSL18,
  author    = {Barret Zoph and
               Vijay Vasudevan and
               Jonathon Shlens and
               Quoc V. Le},
  title     = {Learning Transferable Architectures for Scalable Image Recognition},
  pages     = {8697--8710},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2018}
}

@inproceedings{Real2019AgingEF,
  title={Aging Evolution for Image Classifier Architecture Search},
  author={E. Real and A. Aggarwal and Y. Huang and Quoc V. Le},
  booktitle={AAAI Conference on Artificial Intelligence},
  year={2019}
}

@inproceedings{DBLP:conf/icml/SoLL19,
  author    = {David R. So and
               Quoc V. Le and
               Chen Liang},
  title     = {The Evolved Transformer},
  volume    = {97},
  pages     = {5877--5886},
  publisher = {International Conference on Machine Learning},
  year      = {2019}
}

@inproceedings{DBLP:conf/icga/MillerTH89,
  author    = {Geoffrey F. Miller and
               Peter M. Todd and
               Shailesh U. Hegde},
  title     = {Designing Neural Networks using Genetic Algorithms},
  pages     = {379--384},
  publisher = {International Conference on Genetic Algorithms},
  year      = {1989}
}

@inproceedings{mandischer1993representation,
  title={Representation and evolution of neural networks},
  author={Mandischer, Martin},
  publisher={Artificial Neural Nets and Genetic Algorithms},
  pages={643--649},
  year={1993}
}

@inproceedings{koza1991genetic,
  title={Genetic generation of both the weights and architecture for a neural network},
  author={Koza, John R and Rice, James P},
  publisher={international joint conference on neural networks},
  volume={2},
  pages={397--404},
  year={1991}
}

@inproceedings{DBLP:conf/ijcnn/Dodd90,
  author    = {N. Dodd},
  title     = {Optimisation of network structure using genetic techniques},
  publisher = {International Joint Conference on Neural Networks, San
               Diego, CA, USA, June 17-21, 1990},
  pages     = {965--970},
  year      = {1990}
}

@inproceedings{DBLP:conf/nips/HarpSG89,
  author    = {Steven A. Harp and
               Tariq Samad and
               Aloke Guha},
  title     = {Designing Application-Specific Neural Networks Using the Genetic Algorithm},
  publisher = {Advances in Neural Information Processing Systems},
  pages     = {447--454},
  year      = {1989}
}

@article{DBLP:journals/compsys/Kitano90,
  author    = {Hiroaki Kitano},
  title     = {Designing Neural Networks Using Genetic Algorithms with Graph Generation
               System},
  journal   = {Complex Systems},
  volume    = {4},
  number    = {4},
  year      = {1990}
}

@inproceedings{DBLP:conf/icec/SantosD94,
  author    = {Jos{\'{e}} Santos Reyes and
               Richard J. Duro},
  title     = {Evolutionary Generation and Training of Recurrent Artificial Neural
               Networks},
  pages     = {759--763},
  publisher = {IEEE Conference on Evolutionary Computation},
  year      = {1994}
}

@inproceedings{DBLP:conf/nips/LuoTQCL18,
  author    = {Renqian Luo and
               Fei Tian and
               Tao Qin and
               Enhong Chen and
               Tie-Yan Liu},
  title     = {Neural Architecture Optimization},
  publisher = {Advances in Neural Information Processing Systems},
  pages     = {7827--7838},
  year      = {2018}
}

@inproceedings{DBLP:conf/icml/PhamGZLD18,
  author    = {Hieu Pham and
               Melody Y. Guan and
               Barret Zoph and
               Quoc V. Le and
               Jeff Dean},
  title     = {Efficient Neural Architecture Search via Parameter Sharing},
  volume    = {80},
  pages     = {4092--4101},
  publisher = {International Conference on Machine Learning},
  year      = {2018}
}

@inproceedings{DBLP:conf/iclr/LiuSY19,
  author    = {Hanxiao Liu and
               Karen Simonyan and
               Yiming Yang},
  title     = {{DARTS:} Differentiable Architecture Search},
  publisher = {International Conference on Learning Representations},
  year      = {2019}
}

@inproceedings{DBLP:conf/acl/LiHZXJXZLL20,
  author    = {Yinqiao Li and
               Chi Hu and
               Yuhao Zhang and
               Nuo Xu and
               Yufan Jiang and
               Tong Xiao and
               Jingbo Zhu and
               Tongran Liu and
               Changliang Li},
  title     = {Learning Architectures from an Extended Search Space for Language
               Modeling},
  pages     = {6629--6639},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@inproceedings{DBLP:conf/emnlp/JiangHXZZ19,
  author    = {Yufan Jiang and
               Chi Hu and
               Tong Xiao and
               Chunliang Zhang and
               Jingbo Zhu},
  title     = {Improved Differentiable Architecture Search for Language Modeling
               and Named Entity Recognition},
  pages     = {3583--3588},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/aaai/RealAHL19,
  author    = {Esteban Real and
               Alok Aggarwal and
               Yanping Huang and
               Quoc V. Le},
  title     = {Regularized Evolution for Image Classifier Architecture Search},
  pages     = {4780--4789},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2019}
}

@inproceedings{DBLP:conf/icml/RealMSSSTLK17,
  author    = {Esteban Real and
               Sherry Moore and
               Andrew Selle and
               Saurabh Saxena and
               Yutaka Leon Suematsu and
               Jie Tan and
               Quoc V. Le and
               Alexey Kurakin},
  title     = {Large-Scale Evolution of Image Classifiers},
  volume    = {70},
  pages     = {2902--2911},
  publisher = {International Conference on Machine Learning},
  year      = {2017}
}

@inproceedings{DBLP:conf/iclr/ElskenMH19,
  author    = {Thomas Elsken and
               Jan Hendrik Metzen and
               Frank Hutter},
  title     = {Efficient Multi-Objective Neural Architecture Search via Lamarckian
               Evolution},
  publisher = {International Conference on Learning Representations},
  year      = {2019}
}

@inproceedings{DBLP:conf/iclr/BakerGNR17,
  author    = {Bowen Baker and
               Otkrist Gupta and
               Nikhil Naik and
               Ramesh Raskar},
  title     = {Designing Neural Network Architectures using Reinforcement Learning},
  publisher = {International Conference on Learning Representations},
  year      = {2017}
}

@inproceedings{DBLP:conf/cvpr/TanCPVSHL19,
  author    = {Mingxing Tan and
               Bo Chen and
               Ruoming Pang and
               Vijay Vasudevan and
               Mark Sandler and
               Andrew Howard and
               Quoc V. Le},
  title     = {MnasNet: Platform-Aware Neural Architecture Search for Mobile},
  pages     = {2820--2828},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2019}
}

@inproceedings{DBLP:conf/iclr/LiuSVFK18,
  author    = {Hanxiao Liu and
               Karen Simonyan and
               Oriol Vinyals and
               Chrisantha Fernando and
               Koray Kavukcuoglu},
  title     = {Hierarchical Representations for Efficient Architecture Search},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@inproceedings{DBLP:conf/iclr/CaiZH19,
  author    = {Han Cai and
               Ligeng Zhu and
               Song Han},
  title     = {ProxylessNAS: Direct Neural Architecture Search on Target Task and
               Hardware},
  publisher = {International Conference on Learning Representations},
  year      = {2019}
}

@inproceedings{DBLP:conf/cvpr/LiuCSAHY019,
  author    = {Chenxi Liu and
               Liang-Chieh Chen and
               Florian Schroff and
               Hartwig Adam and
               Wei Hua and
               Alan L. Yuille and
               Fei-Fei Li},
  title     = {Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic
               Image Segmentation},
  pages     = {82--92},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2019}
}

@inproceedings{DBLP:conf/cvpr/WuDZWSWTVJK19,
  author    = {Bichen Wu and
               Xiaoliang Dai and
               Peizhao Zhang and
               Yanghan Wang and
               Fei Sun and
               Yiming Wu and
               Yuandong Tian and
               Peter Vajda and
               Yangqing Jia and
               Kurt Keutzer},
  title     = {FBNet: Hardware-Aware Efficient ConvNet Design via Differentiable
               Neural Architecture Search},
  pages     = {10734--10742},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2019}
}

@inproceedings{DBLP:conf/iclr/XieZLL19,
  author    = {Sirui Xie and
               Hehui Zheng and
               Chunxiao Liu and
               Liang Lin},
  title     = {{SNAS:} stochastic neural architecture search},
  publisher = {International Conference on Learning Representations},
  year      = {2019}
}

@inproceedings{DBLP:conf/uai/LiT19,
  author    = {Liam Li and
               Ameet Talwalkar},
  title     = {Random Search and Reproducibility for Neural Architecture Search},
  pages     = {129},
  publisher = {Conference on Uncertainty in Artificial Intelligence},
  year      = {2019}
}

@inproceedings{DBLP:conf/cvpr/DongY19,
  author    = {Xuanyi Dong and
               Yi Yang},
  title     = {Searching for a Robust Neural Architecture in Four {GPU} Hours},
  pages     = {1761--1770},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2019}
}

@inproceedings{DBLP:conf/iclr/XuX0CQ0X20,
  author    = {Yuhui Xu and
               Lingxi Xie and
               Xiaopeng Zhang and
               Xin Chen and
               Guo-Jun Qi and
               Qi Tian and
               Hongkai Xiong},
  title     = {{PC-DARTS:} Partial Channel Connections for Memory-Efficient Architecture
               Search},
  publisher = {International Conference on Learning Representations},
  year      = {2020}
}

@inproceedings{DBLP:conf/iclr/ZelaESMBH20,
  author    = {Arber Zela and
               Thomas Elsken and
               Tonmoy Saikia and
               Yassine Marrakchi and
               Thomas Brox and
               Frank Hutter},
  title     = {Understanding and Robustifying Differentiable Architecture Search},
  publisher = {International Conference on Learning Representations},
  year      = {2020}
}

@inproceedings{DBLP:conf/iclr/MeiLLJYYY20,
  author    = {Jieru Mei and
               Yingwei Li and
               Xiaochen Lian and
               Xiaojie Jin and
               Linjie Yang and
               Alan L. Yuille and
               Jianchao Yang},
  title     = {AtomNAS: Fine-Grained End-to-End Neural Architecture Search},
  publisher = {International Conference on Learning Representations},
  year      = {2020}
}

@article{DBLP:journals/jmlr/LiJDRT17,
  author    = {Lisha Li and
               Kevin G. Jamieson and
               Giulia DeSalvo and
               Afshin Rostamizadeh and
               Ameet Talwalkar},
  title     = {Hyperband: {A} Novel Bandit-Based Approach to Hyperparameter Optimization},
  journal   = {Journal of Machine Learning Research},
  volume    = {18},
  pages     = {185:1--185:52},
  year      = {2017}
}

@inproceedings{DBLP:conf/eccv/LiuZNSHLFYHM18,
  author    = {Chenxi Liu and
               Barret Zoph and
               Maxim Neumann and
               Jonathon Shlens and
               Wei Hua and
               Li-Jia Li and
               Li Fei-Fei and
               Alan L. Yuille and
               Jonathan Huang and
               Kevin Murphy},
  title     = {Progressive Neural Architecture Search},
  volume    = {11205},
  pages     = {19--35},
  publisher = {European Conference on Computer Vision},
  year      = {2018}
}

@article{DBLP:journals/taslp/FanTXQLL20,
  author    = {Yang Fan and
               Fei Tian and
               Yingce Xia and
               Tao Qin and
               Xiang-Yang Li and
               Tie-Yan Liu},
  title     = {Searching Better Architectures for Neural Machine Translation},
  journal   = {IEEE Transactions on Audio, Speech, and Language Processing},
  volume    = {28},
  pages     = {1574--1585},
  year      = {2020}
}

@inproceedings{DBLP:conf/ijcai/ChenLQWLDDHLZ20,
  author    = {Daoyuan Chen and
               Yaliang Li and
               Minghui Qiu and
               Zhen Wang and
               Bofang Li and
               Bolin Ding and
               Hongbo Deng and
               Jun Huang and
               Wei Lin and
               Jingren Zhou},
  title     = {AdaBERT: Task-Adaptive {BERT} Compression with Differentiable Neural
               Architecture Search},
  publisher = {International Joint Conference on Artificial Intelligence},
  pages     = {2463--2469},
  year      = {2020}
}

@inproceedings{DBLP:conf/acl/WangWLCZGH20,
  author    = {Hanrui Wang and
               Zhanghao Wu and
               Zhijian Liu and
               Han Cai and
               Ligeng Zhu and
               Chuang Gan and
               Song Han},
  title     = {{HAT:} Hardware-Aware Transformers for Efficient Natural Language
               Processing},
  pages     = {7675--7688},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@inproceedings{DBLP:conf/icml/CaiYZHY18,
  author    = {Han Cai and
               Jiacheng Yang and
               Weinan Zhang and
               Song Han and
               Yong Yu},
  title     = {Path-Level Network Transformation for Efficient Architecture Search},
  volume    = {80},
  pages     = {677--686},
  publisher = {International Conference on Machine Learning},
  year      = {2018}
}

@article{DBLP:journals/corr/abs-2003-03384,
  author    = {Esteban Real and
               Chen Liang and
               David R. So and
               Quoc V. Le},
  title     = {AutoML-Zero: Evolving Machine Learning Algorithms From Scratch},
  journal   = {CoRR},
  volume    = {abs/2003.03384},
  year      = {2020}
}

@article{Chollet2017XceptionDL,
  title={Xception: Deep Learning with Depthwise Separable Convolutions},
  author    = {Fran{\c{c}}ois Chollet},
  journal={IEEE Conference on Computer Vision and Pattern Recognition},
  year={2017},
  pages={1800-1807}
}

@article{DBLP:journals/tnn/AngelineSP94,
  author    = {Peter J. Angeline and
               Gregory M. Saunders and
               Jordan B. Pollack},
  title     = {An evolutionary algorithm that constructs recurrent neural networks},
  journal   = {IEEE Transactions on Neural Networks},
  volume    = {5},
  number    = {1},
  pages     = {54--65},
  year      = {1994}
}

@article{stanley2002evolving,
  title={Evolving neural networks through augmenting topologies},
  author={Stanley, Kenneth O and Miikkulainen, Risto},
  journal={Evolutionary computation},
  volume={10},
  number={2},
  pages={99--127},
  year={2002},
  publisher={MIT Press}
}

@article{DBLP:journals/alife/StanleyDG09,
  author    = {Kenneth O. Stanley and
               David B. D'Ambrosio and
               Jason Gauci},
  title     = {A Hypercube-Based Encoding for Evolving Large-Scale Neural Networks},
  journal   = {Artificial Life},
  volume    = {15},
  number    = {2},
  pages     = {185--212},
  year      = {2009},
  publisher = {MIT Press}
}

@inproceedings{DBLP:conf/ijcai/SuganumaSN18,
  author    = {Masanori Suganuma and
               Shinichi Shirakawa and
               Tomoharu Nagao},
  title     = {A Genetic Programming Approach to Designing Convolutional Neural Network
               Architectures},
  pages     = {5369--5373},
  publisher = {International Joint Conference on Artificial Intelligence},
  year      = {2018}
}

@inproceedings{DBLP:conf/iccv/XieY17,
  author    = {Lingxi Xie and
               Alan L. Yuille},
  title     = {Genetic {CNN}},
  pages     = {1388--1397},
  publisher = {IEEE International Conference on Computer Vision},
  year      = {2017}
}

@inproceedings{DBLP:conf/cvpr/ZhongYWSL18,
  author    = {Zhao Zhong and
               Junjie Yan and
               Wei Wu and
               Jing Shao and
               Cheng-Lin Liu},
  title     = {Practical Block-Wise Neural Network Architecture Generation},
  pages     = {2423--2432},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2018}
}

@inproceedings{DBLP:conf/icml/BergstraYC13,
  author    = {James Bergstra and
               Daniel Yamins and
               David D. Cox},
  title     = {Making a Science of Model Search: Hyperparameter Optimization in Hundreds
               of Dimensions for Vision Architectures},
  volume    = {28},
  pages     = {115--123},
  publisher = {International Conference on Machine Learning},
  year      = {2013}
}

@inproceedings{DBLP:conf/ijcai/DomhanSH15,
  author    = {Tobias Domhan and
               Jost Tobias Springenberg and
               Frank Hutter},
  title     = {Speeding Up Automatic Hyperparameter Optimization of Deep Neural Networks
               by Extrapolation of Learning Curves},
  pages     = {3460--3468},
  publisher = {International Joint Conference on Artificial Intelligence},
  year      = {2015}
}

@inproceedings{DBLP:conf/icml/MendozaKFSH16,
  author    = {Hector Mendoza and
               Aaron Klein and
               Matthias Feurer and
               Jost Tobias Springenberg and
               Frank Hutter},
  title     = {Towards Automatically-Tuned Neural Networks},
  volume    = {64},
  pages     = {58--65},
  publisher = {International Conference on Machine Learning},
  year      = {2016}
}

@article{DBLP:journals/corr/abs-1807-06906,
  author    = {Arber Zela and
               Aaron Klein and
               Stefan Falkner and
               Frank Hutter},
  title     = {Towards Automated Deep Learning: Efficient Joint Neural Architecture
               and Hyperparameter Search},
  journal   = {International Conference on Machine Learning},
  year      = {2018}
}

@article{li2020automated,
  title={Automated and Lightweight Network Design via Random Search for Remote Sensing Image Scene Classification},
  author={Li, Jihao and Diao, Wenhui and Sun, Xian and Feng, Yingchao and Zhang, Wenkai and Chang, Zhonghan and Fu, Kun},
  journal={The International Archives of Photogrammetry, Remote Sensing and Spatial Information Sciences},
  volume={43},
  pages={1217--1224},
  year={2020}
}

@inproceedings{DBLP:conf/cvpr/BenderLCCCKL20,
  author    = {Gabriel Bender and
               Hanxiao Liu and
               Bo Chen and
               Grace Chu and
               Shuyang Cheng and
               Pieter-Jan Kindermans and
               Quoc V. Le},
  title     = {Can Weight Sharing Outperform Random Architecture Search? An Investigation
               With TuNAS},
  pages     = {14311--14320},
  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
  year      = {2020}
}

@inproceedings{DBLP:conf/aistats/KleinFBHH17,
  author    = {Aaron Klein and
               Stefan Falkner and
               Simon Bartels and
               Philipp Hennig and
               Frank Hutter},
  title     = {Fast Bayesian Optimization of Machine Learning Hyperparameters on
               Large Datasets},
  volume    = {54},
  pages     = {528--536},
  publisher = {International Conference on Artificial Intelligence and Statistics},
  year      = {2017}
}

@article{DBLP:journals/corr/ChrabaszczLH17,
  author    = {Patryk Chrabaszcz and
               Ilya Loshchilov and
               Frank Hutter},
  title     = {A Downsampled Variant of ImageNet as an Alternative to the {CIFAR}
               datasets},
  journal   = {CoRR},
  volume    = {abs/1707.08819},
  year      = {2017}
}

@inproceedings{DBLP:conf/aaai/CaiCZYW18,
  author    = {Han Cai and
               Tianyao Chen and
               Weinan Zhang and
               Yong Yu and
               Jun Wang},
  title     = {Efficient Architecture Search by Network Transformation},
  pages     = {2787--2794},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2018}
}

@inproceedings{DBLP:conf/iclr/ElskenMH18,
  author    = {Thomas Elsken and
               Jan Hendrik Metzen and
               Frank Hutter},
  title     = {Simple and efficient architecture search for Convolutional Neural
               Networks},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@inproceedings{DBLP:conf/icml/BenderKZVL18,
  author    = {Gabriel Bender and
               Pieter-Jan Kindermans and
               Barret Zoph and
               Vijay Vasudevan and
               Quoc V. Le},
  title     = {Understanding and Simplifying One-Shot Architecture Search},
  volume    = {80},
  pages     = {549--558},
  publisher = {International Conference on Machine Learning},
  year      = {2018}
}

@inproceedings{DBLP:conf/nips/SaxenaV16,
  author    = {Shreyas Saxena and
               Jakob Verbeek},
  title     = {Convolutional Neural Fabrics},
  publisher = {Advances in Neural Information Processing Systems},
  pages     = {4053--4061},
  year      = {2016}
}

@inproceedings{DBLP:conf/iclr/KleinFSH17,
  author    = {Aaron Klein and
               Stefan Falkner and
               Jost Tobias Springenberg and
               Frank Hutter},
  title     = {Learning Curve Prediction with Bayesian Neural Networks},
  publisher = {International Conference on Learning Representations},
  year      = {2017}
}

@inproceedings{DBLP:conf/iclr/BakerGRN18,
  author    = {Bowen Baker and
               Otkrist Gupta and
               Ramesh Raskar and
               Nikhil Naik},
  title     = {Accelerating Neural Architecture Search using Performance Prediction},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@inproceedings{DBLP:conf/wmt/XiaTTGHCFGLLWWZ19,
  author    = {Yingce Xia and
               Xu Tan and
               Fei Tian and
               Fei Gao and
               Di He and
               Weicong Chen and
               Yang Fan and
               Linyuan Gong and
               Yichong Leng and
               Renqian Luo and
               Yiren Wang and
               Lijun Wu and
               Jinhua Zhu and
               Tao Qin and
               Tie-Yan Liu},
  title     = {Microsoft Research Asia's Systems for {WMT19}},
  pages     = {424--433},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{DBLP:conf/iclr/RamachandranZL18,
  author    = {Prajit Ramachandran and
               Barret Zoph and
               Quoc V. Le},
  title     = {Searching for Activation Functions},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@article{DBLP:journals/corr/abs-2009-02070,
  author    = {Wei Zhu and
               Xiaoling Wang and
               Xipeng Qiu and
               Yuan Ni and
               Guotong Xie},
  title     = {AutoTrans: Automating Transformer Design via Reinforced Architecture
               Search},
  journal   = {CoRR},
  volume    = {abs/2009.02070},
  year      = {2020}
}

@inproceedings{DBLP:conf/acl/WangWLCZGH20,
  author    = {Hanrui Wang and
               Zhanghao Wu and
               Zhijian Liu and
               Han Cai and
               Ligeng Zhu and
               Chuang Gan and
               Song Han},
  title     = {{HAT:} Hardware-Aware Transformers for Efficient Natural Language
               Processing},
  pages     = {7675--7688},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-2008-06808,
  author    = {Henry Tsai and
               Jayden Ooi and
               Chun-Sung Ferng and
               Hyung Won Chung and
               Jason Riesa},
  title     = {Finding Fast Transformers: One-Shot Neural Architecture Search by
               Component Composition},
  journal   = {CoRR},
  volume    = {abs/2008.06808},
  year      = {2020}
}

@inproceedings{Wang2019ExploitingSC,
  title={Exploiting Sentential Context for Neural Machine Translation},
  author={Xing Wang and Zhaopeng Tu and Longyue Wang and Shuming Shi},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2019}
}

@inproceedings{Wei2020MultiscaleCD,
  title={Multiscale Collaborative Deep Models for Neural Machine Translation},
  author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2020}
}

@article{li2020shallow,
  title={Shallow-to-Deep Training for Neural Machine Translation},
  author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo},
  journal={Conference on Empirical Methods in Natural Language Processing},
  year={2020}
}

@article{DBLP:journals/corr/abs-2007-06257,
  author    = {Hongfei Xu and
               Qiuhui Liu and
               Deyi Xiong and
               Josef van Genabith},
  title     = {Transformer with Depth-Wise {LSTM}},
  journal   = {CoRR},
  volume    = {abs/2007.06257},
  year      = {2020}
}

@inproceedings{DBLP:conf/acl/XuLGXZ20,
  author    = {Hongfei Xu and
               Qiuhui Liu and
               Josef van Genabith and
               Deyi Xiong and
               Jingyi Zhang},
  title     = {Lipschitz Constrained Parameter Initialization for Deep Transformers},
  pages     = {397--402},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@article{DBLP:journals/corr/abs-2006-10369,
  author    = {Jungo Kasai and
               Nikolaos Pappas and
               Hao Peng and
               James Cross and
               Noah A. Smith},
  title     = {Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff
               in Machine Translation},
  journal   = {CoRR},
  volume    = {abs/2006.10369},
  year      = {2020}
}


%%%%% chapter 15------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 16------------------------------------------------------
@inproceedings{DBLP:conf/wmt/CurreyBH17,
  author    = {Anna Currey and
               Antonio Valerio Miceli Barone and
               Kenneth Heafield},
  title     = {Copied Monolingual Data Improves Low-Resource Neural Machine Translation},
  pages     = {148--156},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{DBLP:conf/emnlp/EdunovOAG18,
  author    = {Sergey Edunov and
               Myle Ott and
               Michael Auli and
               David Grangier},
  title     = {Understanding Back-Translation at Scale},
  pages     = {489--500},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/FadaeeM18,
  author    = {Marzieh Fadaee and
               Christof Monz},
  title     = {Back-Translation Sampling by Targeting Difficult Words in Neural Machine Translation},
  pages     = {436--446},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/nlpcc/XuLXLLXZ19,
  author    = {Nuo Xu and
               Yinqiao Li and
               Chen Xu and
               Yanyang Li and
               Bei Li and
               Tong Xiao and
               Jingbo Zhu},
  title     = {Analysis of Back-Translation Methods for Low-Resource Neural Machine
               Translation},
  volume    = {11839},
  pages     = {466--475},
  publisher = {Springer},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/CaswellCG19,
  author    = {Isaac Caswell and
               Ciprian Chelba and
               David Grangier},
  title     = {Tagged Back-Translation},
  pages     = {53--63},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/WangLWLS19,
  author    = {Shuo Wang and
               Yang Liu and
               Chao Wang and
               Huanbo Luan and
               Maosong Sun},
  title     = {Improving Back-Translation with Uncertainty-based Confidence Estimation},
  pages     = {791--802},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/corr/abs200111327,
  author    = {Idris Abdulmumin and
               Bashir Shehu Galadanci and
               Abubakar Isa},
  title     = {Iterative Batch Back-Translation for Neural Machine Translation: {A}
               Conceptual Model},
  journal   = {CoRR},
  year      = {2020}
}
@article{DBLP:journals/corr/abs200403672,
  author    = {Zi-Yi Dou and
               Antonios Anastasopoulos and
               Graham Neubig},
  title     = {Dynamic Data Selection and Weighting for Iterative Back-Translation},
  journal   = {CoRR},
  year      = {2020}
}
@inproceedings{DBLP:conf/emnlp/WuZHGQLL19,
  author    = {Lijun Wu and
               Jinhua Zhu and
               Di He and
               Fei Gao and
               Tao Qin and
               Jianhuang Lai and
               Tie-Yan Liu},
  title     = {Machine Translation With Weakly Paired Documents},
  pages     = {4374--4383},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/corr/abs-1901-09069,
  author    = {Felipe Almeida and
               Geraldo Xex{\'{e}}o},
  title     = {Word Embeddings: {A} Survey},
  journal   = {CoRR},
  year      = {2019}
}
@article{DBLP:journals/corr/abs-2002-06823,
  author    = {Jinhua Zhu and
               Yingce Xia and
               Lijun Wu and
               Di He and
               Tao Qin and
               Wengang Zhou and
               Houqiang Li and
               Tie-Yan Liu},
  title     = {Incorporating {BERT} into Neural Machine Translation},
  journal   = {CoRR},
  year      = {2020}
}
@inproceedings{song2019mass,
  author    = {Kaitao Song and
               Xu Tan and
               Tao Qin and
               Jianfeng Lu and
               Tie-Yan Liu},
  title     = {{MASS:} Masked Sequence to Sequence Pre-training for Language Generation},
  volume    = {97},
  pages     = {5926--5936},
  publisher = {{PMLR}},
  year      = {2019}
}
@article{DBLP:journals/corr/Ruder17a,
  author    = {Sebastian Ruder},
  title     = {An Overview of Multi-Task Learning in Deep Neural Networks},
  journal   = {CoRR},
  volume    = {abs/1706.05098},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/DomhanH17,
  author    = {Tobias Domhan and
               Felix Hieber},
  title     = {Using Target-side Monolingual Data for Neural Machine Translation
               through Multi-task Learning},
  pages     = {1500--1505},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/icml/XiaQCBYL17,
  author    = {Yingce Xia and
               Tao Qin and
               Wei Chen and
               Jiang Bian and
               Nenghai Yu and
               Tie-Yan Liu},
  title     = {Dual Supervised Learning},
  volume    = {70},
  pages     = {3789--3798},
  publisher = {{PMLR}},
  year      = {2017}
}
@inproceedings{DBLP:conf/iccv/ZhuPIE17,
  author    = {Jun-Yan Zhu and
               Taesung Park and
               Phillip Isola and
               Alexei A. Efros},
  title     = {Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial
               Networks},
  pages     = {2242--2251},
  publisher = {{IEEE} Computer Society},
  year      = {2017}
}
@inproceedings{DBLP:conf/nips/HeXQWYLM16,
  author    = {Di He and
               Yingce Xia and
               Tao Qin and
               Liwei Wang and
               Nenghai Yu and
               Tie-Yan Liu and
               Wei-Ying Ma},
  title     = {Dual Learning for Machine Translation},
  pages     = {820--828},
  year      = {2016}
}
@inproceedings{DBLP:conf/nips/SuttonMSM99,
  author    = {Richard S. Sutton and
               David A. McAllester and
               Satinder P. Singh and
               Yishay Mansour},
  title     = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
  pages     = {1057--1063},
  publisher = {The {MIT} Press},
  year      = {1999}
}
@inproceedings{lample2019cross,
  author    = {Alexis Conneau and
               Guillaume Lample},
  title     = {Cross-lingual Language Model Pretraining},
  pages     = {7057--7067},
  year      = {2019}
}
@inproceedings{DBLP:conf/aclnmt/HoangKHC18,
  author    = {Cong Duy Vu Hoang and
               Philipp Koehn and
               Gholamreza Haffari and
               Trevor Cohn},
  title     = {Iterative Back-Translation for Neural Machine Translation},
  pages     = {18--24},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/icml/OttAGR18,
  author    = {Myle Ott and
               Michael Auli and
               David Grangier and
               Marc'Aurelio Ranzato},
  title     = {Analyzing Uncertainty in Neural Machine Translation},
  volume    = {80},
  pages     = {3953--3962},
  publisher = {{PMLR}},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/FadaeeBM17a,
  author    = {Marzieh Fadaee and
               Arianna Bisazza and
               Christof Monz},
  title     = {Data Augmentation for Low-Resource Neural Machine Translation},
  pages     = {567--573},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{finding2006adafre,
  author    = {S. F. Adafre and Maarten de Rijke},
  title     = {Finding Similar Sentences across Multiple Languages in Wikipedia },
  publisher = {European Association of Computational Linguistics},
  year      = {2006}
}
@inproceedings{method2008keiji,
  author    = {Keiji Yasuda and Eiichiro Sumita},
  title     = {Method for building sentence-aligned corpus from wikipedia},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2008}
}
@article{DBLP:journals/coling/MunteanuM05,
  author    = {Dragos Stefan Munteanu and
               Daniel Marcu},
  title     = {Improving Machine Translation Performance by Exploiting Non-Parallel
               Corpora},
  journal   = {Computational Linguistics},
  volume    = {31},
  number    = {4},
  pages     = {477--504},
  year      = {2005}
}
@inproceedings{DBLP:conf/naacl/SmithQT10,
  author    = {Jason R. Smith and
               Chris Quirk and
               Kristina Toutanova},
  title     = {Extracting Parallel Sentences from Comparable Corpora using Document
               Level Alignment},
  pages     = {403--411},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/emnlp/ZhangZ16,
  author    = {Jiajun Zhang and
               Chengqing Zong},
  title     = {Exploiting Source-side Monolingual Data in Neural Machine Translation},
  pages     = {1535--1545},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:conf/acl/XiaKAN19,
  author    = {Mengzhou Xia and
               Xiang Kong and
               Antonios Anastasopoulos and
               Graham Neubig},
  title     = {Generalized Data Augmentation for Low-Resource Translation},
  pages     = {5786--5796},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/WangPDN18,
  author    = {Xinyi Wang and
               Hieu Pham and
               Zihang Dai and
               Graham Neubig},
  title     = {SwitchOut: an Efficient Data Augmentation Algorithm for Neural Machine
               Translation},
  pages     = {856--861},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/GaoZWXQCZL19,
  author    = {Fei Gao and
               Jinhua Zhu and
               Lijun Wu and
               Yingce Xia and
               Tao Qin and
               Xueqi Cheng and
               Wengang Zhou and
               Tie-Yan Liu},
  title     = {Soft Contextual Data Augmentation for Neural Machine Translation},
  pages     = {5539--5544},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/WangLWLS19,
  author    = {Shuo Wang and
               Yang Liu and
               Chao Wang and
               Huanbo Luan and
               Maosong Sun},
  title     = {Improving Back-Translation with Uncertainty-based Confidence Estimation},
  pages     = {791--802},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/WuWXQLL19,
  author    = {Lijun Wu and
               Yiren Wang and
               Yingce Xia and
               Tao Qin and
               Jianhuang Lai and
               Tie-Yan Liu},
  title     = {Exploiting Monolingual Data at Scale for Neural Machine Translation},
  pages     = {4205--4215},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/LiLHZZ19,
  author    = {Guanlin Li and
               Lemao Liu and
               Guoping Huang and
               Conghui Zhu and
               Tiejun Zhao},
  title     = {Understanding Data Augmentation in Neural Machine Translation: Two
               Perspectives towards Generalization},
  pages     = {5688--5694},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{2015OnGulcehre,
  title = {On Using Monolingual Corpora in Neural Machine Translation},
  author = { Gulcehre Caglar  and  
           Firat Orhan  and  
           Xu Kelvin  and  
           Cho Kyunghyun  and  
           Barrault Loic  and  
           Lin Huei Chi  and  
           Bougares Fethi  and  
           Schwenk Holger  and  
           Bengio  Yoshua },
  journal = {Computer Science},
  year = {2015},
}

@phdthesis{黄书剑0统计机器翻译中的词对齐研究,
  title={统计机器翻译中的词对齐研究},
  author={黄书剑},
  publisher={南京大学},
  year={2012}
}
@article{DBLP:journals/corr/MikolovLS13,
  author    = {Tomas Mikolov and
               Quoc V. Le and
               Ilya Sutskever},
  title     = {Exploiting Similarities among Languages for Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1309.4168},
  year      = {2013}
}
@inproceedings{DBLP:conf/acl/VulicK16,
  author    = {Ivan Vulic and
               Anna Korhonen},
  title     = {On the Role of Seed Lexicons in Learning Bilingual Word Embeddings},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:conf/iclr/SmithTHH17,
  author    = {Samuel L. Smith and
               David H. P. Turban and
               Steven Hamblin and
               Nils Y. Hammerla},
  title     = {Offline bilingual word vectors, orthogonal transformations and the
               inverted softmax},
  publisher = {International Conference on Learning Representations},
  year      = {2017}
}
@inproceedings{DBLP:conf/acl/ArtetxeLA17,
  author    = {Mikel Artetxe and
               Gorka Labaka and
               Eneko Agirre},
  title     = {Learning bilingual word embeddings with (almost) no bilingual data},
  pages     = {451--462},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@article{1966ASchnemann,
  title={A generalized solution of the orthogonal procrustes problem},
  author={Schnemann, Peter H. },
  journal={Psychometrika},
  volume={31},
  number={1},
  pages={1-10},
  year={1966},
}

@inproceedings{DBLP:conf/iclr/LampleCRDJ18,
  author    = {Guillaume Lample and
               Alexis Conneau and
               Marc'Aurelio Ranzato and
               Ludovic Denoyer and
               Herv{\'{e}} J{\'{e}}gou},
  title     = {Word translation without parallel data},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/ZhangLLS17,
  author    = {Meng Zhang and
               Yang Liu and
               Huanbo Luan and
               Maosong Sun},
  title     = {Adversarial Training for Unsupervised Bilingual Lexicon Induction},
  pages     = {1959--1970},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/XuYOW18,
  author    = {Ruochen Xu and
               Yiming Yang and
               Naoki Otani and
               Yuexin Wu},
  title     = {Unsupervised Cross-lingual Transfer of Word Embedding Spaces},
  pages     = {2465--2474},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/Alvarez-MelisJ18,
  author    = {David Alvarez-Melis and
               Tommi S. Jaakkola},
  title     = {Gromov-Wasserstein Alignment of Word Embedding Spaces},
  pages     = {1881--1890},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/lrec/GarneauGBDL20,
  author    = {Nicolas Garneau and
               Mathieu Godbout and
               David Beauchemin and
               Audrey Durand and
               Luc Lamontagne},
  title     = {A Robust Self-Learning Method for Fully Unsupervised Cross-Lingual
               Mappings of Word Embeddings: Making the Method Robustly Reproducible
               as Well},
  pages     = {5546--5554},
  publisher = {European Language Resources Association},
  year      = {2020}
}
@inproceedings{DBLP:conf/naacl/XingWLL15,
  author    = {Chao Xing and
               Dong Wang and
               Chao Liu and
               Yiye Lin},
  title     = {Normalized Word Embedding and Orthogonal Transform for Bilingual Word
               Translation},
  pages     = {1006--1011},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/iclr/SmithTHH17,
  author    = {Samuel L. Smith and
               David H. P. Turban and
               Steven Hamblin and
               Nils Y. Hammerla},
  title     = {Offline bilingual word vectors, orthogonal transformations and the
               inverted softmax},
  publisher = {International Conference on Learning Representations},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/VulicGRK19,
  author    = {Ivan Vulic and
               Goran Glavas and
               Roi Reichart and
               Anna Korhonen},
  title     = {Do We Really Need Fully Unsupervised Cross-Lingual Embeddings?},
  pages     = {4406--4417},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/SogaardVR18,
  author    = {Anders S{\o}gaard and
               Sebastian Ruder and
               Ivan Vulic},
  title     = {On the Limitations of Unsupervised Bilingual Dictionary Induction},
  pages     = {778--788},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@article{DBLP:journals/talip/MarieF20,
  author    = {Benjamin Marie and
               Atsushi Fujita},
  title     = {Iterative Training of Unsupervised Neural and Statistical Machine
               Translation Systems},
  journal   = {{ACM} Trans. Asian Low Resour. Lang. Inf. Process.},
  volume    = {19},
  number    = {5},
  pages     = {68:1--68:21},
  year      = {2020}
}
@inproceedings{DBLP:conf/acl/ArtetxeLA19,
  author    = {Mikel Artetxe and
               Gorka Labaka and
               Eneko Agirre},
  title     = {An Effective Approach to Unsupervised Machine Translation},
  pages     = {194--203},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/PourdamghaniAGK19,
  author    = {Nima Pourdamghani and
               Nada Aldarrab and
               Marjan Ghazvininejad and
               Kevin Knight and
               Jonathan May},
  title     = {Translating Translationese: {A} Two-Step Approach to Unsupervised
               Machine Translation},
  pages     = {3057--3062},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/iclr/LampleCDR18,
  author    = {Guillaume Lample and
               Alexis Conneau and
               Ludovic Denoyer and
               Marc'Aurelio Ranzato},
  title     = {Unsupervised Machine Translation Using Monolingual Corpora Only},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}
@inproceedings{DBLP:conf/nips/ConneauL19,
  author    = {Alexis Conneau and
               Guillaume Lample},
  title     = {Cross-lingual Language Model Pretraining},
  pages     = {7057--7067},
  year      = {2019}
}
@article{DBLP:journals/ipm/FarhanTAJATT20,
  author    = {Wael Farhan and
               Bashar Talafha and
               Analle Abuammar and
               Ruba Jaikat and
               Mahmoud Al-Ayyoub and
               Ahmad Bisher Tarakji and
               Anas Toma},
  title     = {Unsupervised dialectal neural machine translation},
  journal   = {Information Processing \& Management},
  volume    = {57},
  number    = {3},
  pages     = {102181},
  year      = {2020}
}
@article{A2020Li,
  title={A Simple and Effective Approach to Robust Unsupervised Bilingual Dictionary Induction},
  author={Yanyang Li and Yingfeng Luo and Ye Lin and Quan Du and Huizhen Wang and Shujian Huang and Tong Xiao and Jingbo Zhu},
  publisher={International Conference on Computational Linguistics},
  year={2020}
}

@inproceedings{2018When,
  title={When and Why are Pre-trained Word Embeddings Useful for Neural Machine Translation?},
  author={ Qi, Ye  and  Sachan, Devendra Singh  and  Felix, Matthieu  and  Padmanabhan, Sarguna Janani  and  Neubig, Graham },
  publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year={2018},
}
@inproceedings{DBLP:conf/emnlp/ClinchantJN19,
  author    = {St{\'{e}}phane Clinchant and
               Kweon Woo Jung and
               Vassilina Nikoulina},
  title     = {On the use of {BERT} for Neural Machine Translation},
  pages     = {108--117},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/ImamuraS19,
  author    = {Kenji Imamura and
               Eiichiro Sumita},
  title     = {Recycling a Pre-trained {BERT} Encoder for Neural Machine Translation},
  booktitle = {Proceedings of the 3rd Workshop on Neural Generation and Translation@EMNLP-IJCNLP
               2019, Hong Kong, November 4, 2019},
  pages     = {23--31},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/aaai/YangW0Z00020,
  author    = {Jiacheng Yang and
               Mingxuan Wang and
               Hao Zhou and
               Chengqi Zhao and
               Weinan Zhang and
               Yong Yu and
               Lei Li},
  title     = {Towards Making the Most of {BERT} in Neural Machine Translation},
  pages     = {9378--9385},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
}
@inproceedings{DBLP:conf/aaai/WengYHCL20,
  author    = {Rongxiang Weng and
               Heng Yu and
               Shujian Huang and
               Shanbo Cheng and
               Weihua Luo},
  title     = {Acquiring Knowledge from Pre-Trained Model to Neural Machine Translation},
  pages     = {9266--9273},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
}
@article{DBLP:journals/corr/abs-2001-08210,
  author    = {Yinhan Liu and
               Jiatao Gu and
               Naman Goyal and
               Xian Li and
               Sergey Edunov and
               Marjan Ghazvininejad and
               Mike Lewis and
               Luke Zettlemoyer},
  title     = {Multilingual Denoising Pre-training for Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/2001.08210},
  year      = {2020}
}
@inproceedings{DBLP:conf/aaai/JiZDZCL20,
  author    = {Baijun Ji and
               Zhirui Zhang and
               Xiangyu Duan and
               Min Zhang and
               Boxing Chen and
               Weihua Luo},
  title     = {Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine
               Translation},
  pages     = {115--122},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
}
@inproceedings{DBLP:conf/acl/LewisLGGMLSZ20,
  author    = {Mike Lewis and
               Yinhan Liu and
               Naman Goyal and
               Marjan Ghazvininejad and
               Abdelrahman Mohamed and
               Omer Levy and
               Veselin Stoyanov and
               Luke Zettlemoyer},
  title     = {{BART:} Denoising Sequence-to-Sequence Pre-training for Natural Language
               Generation, Translation, and Comprehension},
  pages     = {7871--7880},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@article{DBLP:journals/corr/abs-2009-08088,
  author    = {Zhen Yang and
               Bojie Hu and
               Ambyera Han and
               Shen Huang and
               Qi Ju},
  title     = {Code-switching pre-training for neural machine translation},
  journal   = {CoRR},
  volume    = {abs/2009.08088},
  year      = {2020}
}
@article{DBLP:journals/corr/abs-2010-09403,
  author    = {Dusan Varis and
               Ondrej Bojar},
  title     = {Unsupervised Pretraining for Neural Machine Translation Using Elastic
               Weight Consolidation},
  journal   = {CoRR},
  volume    = {abs/2010.09403},
  year      = {2020}
}
@inproceedings{DBLP:conf/emnlp/LampleOCDR18,
  author    = {Guillaume Lample and
               Myle Ott and
               Alexis Conneau and
               Ludovic Denoyer and
               Marc'Aurelio Ranzato},
  title     = {Phrase-Based {\&} Neural Unsupervised Machine Translation},
  pages     = {5039--5049},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@article{DBLP:journals/jbd/ShortenK19,
  author    = {Connor Shorten and
               Taghi M. Khoshgoftaar},
  title     = {A survey on Image Data Augmentation for Deep Learning},
  journal   = {J. Big Data},
  volume    = {6},
  pages     = {60},
  year      = {2019}
}
@inproceedings{DBLP:conf/naacl/MohiuddinJ19,
  author    = {Tasnim Mohiuddin and
               Shafiq R. Joty},
  title     = {Revisiting Adversarial Autoencoder for Unsupervised Word Translation
               with Cycle Consistency and Improved Training},
  pages     = {3857--3867},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/HuangQC19,
  author    = {Jiaji Huang and
               Qiang Qiu and
               Kenneth Church},
  title     = {Hubless Nearest Neighbor Search for Bilingual Lexicon Induction},
  pages     = {4072--4080},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@article{DBLP:journals/corr/abs-1811-01124,
  author    = {Jean Alaux and
               Edouard Grave and
               Marco Cuturi and
               Armand Joulin},
  title     = {Unsupervised Hyperalignment for Multilingual Word Embeddings},
  journal   = {CoRR},
  volume    = {abs/1811.01124},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/XuYOW18,
  author    = {Ruochen Xu and
               Yiming Yang and
               Naoki Otani and
               Yuexin Wu},
  title     = {Unsupervised Cross-lingual Transfer of Word Embedding Spaces},
  pages     = {2465--2474},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/DouZH18,
  author    = {Zi-Yi Dou and
               Zhi-Hao Zhou and
               Shujian Huang},
  title     = {Unsupervised Bilingual Lexicon Induction via Latent Variable Models},
  pages     = {621--626},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/HoshenW18,
  author    = {Yedid Hoshen and
               Lior Wolf},
  title     = {Non-Adversarial Unsupervised Word Translation},
  pages     = {469--478},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/KimGN18,
  author    = {Yunsu Kim and
               Jiahui Geng and
               Hermann Ney},
  title     = {Improving Unsupervised Word-by-Word Translation with Language Model
               and Denoising Autoencoder},
  pages     = {862--868},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/MukherjeeYH18,
  author    = {Tanmoy Mukherjee and
               Makoto Yamada and
               Timothy M. Hospedales},
  title     = {Learning Unsupervised Word Translations Without Adversaries},
  pages     = {627--632},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/JoulinBMJG18,
  author    = {Armand Joulin and
               Piotr Bojanowski and
               Tomas Mikolov and
               Herv{\'{e}} J{\'{e}}gou and
               Edouard Grave},
  title     = {Loss in Translation: Learning Bilingual Word Mapping with a Retrieval
               Criterion},
  pages     = {2979--2984},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/ChenC18,
  author    = {Xilun Chen and
               Claire Cardie},
  title     = {Unsupervised Multilingual Word Embeddings},
  pages     = {261--270},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/naacl/MohiuddinJ19,
  author    = {Tasnim Mohiuddin and
               Shafiq R. Joty},
  title     = {Revisiting Adversarial Autoencoder for Unsupervised Word Translation
               with Cycle Consistency and Improved Training},
  pages     = {3857--3867},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/TaitelbaumCG19,
  author    = {Hagai Taitelbaum and
               Gal Chechik and
               Jacob Goldberger},
  title     = {Multilingual word translation using auxiliary languages},
  pages     = {1330--1335},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/YangLCLS19,
  author    = {Pengcheng Yang and
               Fuli Luo and
               Peng Chen and
               Tianyu Liu and
               Xu Sun},
  title     = {{MAAM:} {A} Morphology-Aware Alignment Model for Unsupervised Bilingual
               Lexicon Induction},
  pages     = {3190--3196},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/OrmazabalALSA19,
  author    = {Aitor Ormazabal and
               Mikel Artetxe and
               Gorka Labaka and
               Aitor Soroa and
               Eneko Agirre},
  title     = {Analyzing the Limitations of Cross-lingual Word Embedding Mappings},
  pages     = {4990--4995},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/ArtetxeLA19a,
  author    = {Mikel Artetxe and
               Gorka Labaka and
               Eneko Agirre},
  title     = {Bilingual Lexicon Induction through Unsupervised Machine Translation},
  pages     = {5002--5007},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/rep4nlp/VulicKG20,
  author    = {Ivan Vulic and
               Anna Korhonen and
               Goran Glavas},
  title     = {Improving Bilingual Lexicon Induction with Unsupervised Post-Processing
               of Monolingual Word Vector Spaces},
  pages     = {45--54},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}

@article{hartmann2018empirical,
  title={Empirical observations on the instability of aligning word vector spaces with GANs},
  author={Hartmann, Mareike and Kementchedjhieva, Yova and S{\o}gaard, Anders},
  year={2018}
}
@inproceedings{DBLP:conf/emnlp/Kementchedjhieva19,
  author    = {Yova Kementchedjhieva and
               Mareike Hartmann and
               Anders S{\o}gaard},
  title     = {Lost in Evaluation: Misleading Benchmarks for Bilingual Dictionary
               Induction},
  pages     = {3334--3339},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/nips/HartmannKS19,
  author    = {Mareike Hartmann and
               Yova Kementchedjhieva and
               Anders S{\o}gaard},
  title     = {Comparing Unsupervised Word Translation Methods Step by Step},
  pages     = {6031--6041},
  year      = {2019}
}

@inproceedings{DBLP:conf/emnlp/HartmannKS18,
  author    = {Mareike Hartmann and
               Yova Kementchedjhieva and
               Anders S{\o}gaard},
  title     = {Why is unsupervised alignment of English embeddings from different
               algorithms so hard?},
  pages     = {582--586},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@inproceedings{DBLP:conf/emnlp/VulicGRK19,
  author    = {Ivan Vulic and
               Goran Glavas and
               Roi Reichart and
               Anna Korhonen},
  title     = {Do We Really Need Fully Unsupervised Cross-Lingual Embeddings?},
  pages     = {4406--4417},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/JoulinBMJG18,
  author    = {Armand Joulin and
               Piotr Bojanowski and
               Tomas Mikolov and
               Herv{\'{e}} J{\'{e}}gou and
               Edouard Grave},
  title     = {Loss in Translation: Learning Bilingual Word Mapping with a Retrieval
               Criterion},
  pages     = {2979--2984},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/SogaardVR18,
  author    = {Anders S{\o}gaard and
               Sebastian Ruder and
               Ivan Vulic},
  title     = {On the Limitations of Unsupervised Bilingual Dictionary Induction},
  pages     = {778--788},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/naacl/HeymanVVM19,
  author    = {Geert Heyman and
               Bregt Verreet and
               Ivan Vulic and
               Marie-Francine Moens},
  title     = {Learning Unsupervised Multilingual Word Embeddings with Incremental
               Multilingual Hubs},
  pages     = {1890--1902},
  publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{2019ADabre,
  title={A Survey of Multilingual Neural Machine Translation},
  author={Dabre, Raj  and  Chu, Chenhui  and  Kunchukuttan, Anoop },
  year={2019},
}
@inproceedings{DBLP:conf/naacl/ZophK16,
  author    = {Barret Zoph and
               Kevin Knight},
  title     = {Multi-Source Neural Translation},
  pages     = {30--34},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2016}
}
@inproceedings{DBLP:conf/naacl/FiratCB16,
  author    = {Orhan Firat and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention
               Mechanism},
  pages     = {866--875},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
  author    = {Melvin Johnson and
               Mike Schuster and
               Quoc V. Le and
               Maxim Krikun and
               Yonghui Wu and
               Zhifeng Chen and
               Nikhil Thorat and
               Fernanda B. Vi{\'{e}}gas and
               Martin Wattenberg and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Multilingual Neural Machine Translation System: Enabling
               Zero-Shot Translation},
  journal   = {Trans. Assoc. Comput. Linguistics},
  volume    = {5},
  pages     = {339--351},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/KimPPKN19,
  author    = {Yunsu Kim and
               Petre Petrov and
               Pavel Petrushkov and
               Shahram Khadivi and
               Hermann Ney},
  title     = {Pivot-based Transfer Learning for Neural Machine Translation between
               Non-English Languages},
  pages     = {866--876},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/ChenLCL17,
  author    = {Yun Chen and
               Yang Liu and
               Yong Cheng and
               Victor O. K. Li},
  title     = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
  pages     = {1925--1935},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@article{DBLP:journals/mt/WuW07,
  author    = {Hua Wu and
               Haifeng Wang},
  title     = {Pivot language approach for phrase-based statistical machine translation},
  journal   = {Mach. Transl.},
  volume    = {21},
  number    = {3},
  pages     = {165--181},
  year      = {2007}
}
@article{Farsi2010somayeh,
  author    = {Somayeh Bakhshaei and Shahram Khadivi and Noushin Riahi },
  title     = {Farsi-german statistical machine translation through bridge language},
  publisher   = {International Telecommunications Symposium},
  pages     = {165--181},
  year      = {2010}
}
@inproceedings{DBLP:conf/acl/ZahabiBK13,
  author    = {Samira Tofighi Zahabi and
               Somayeh Bakhshaei and
               Shahram Khadivi},
  title     = {Using Context Vectors in Improving a Machine Translation System with
               Bridge Language},
  pages     = {318--322},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/emnlp/ZhuHWZWZ14,
  author    = {Xiaoning Zhu and
               Zhongjun He and
               Hua Wu and
               Conghui Zhu and
               Haifeng Wang and
               Tiejun Zhao},
  title     = {Improving Pivot-Based Statistical Machine Translation by Pivoting
               the Co-occurrence Count of Phrase Pairs},
  pages     = {1665--1675},
  publisher = {{ACL}},
  year      = {2014}
}
@inproceedings{DBLP:conf/acl/MiuraNSTN15,
  author    = {Akiva Miura and
               Graham Neubig and
               Sakriani Sakti and
               Tomoki Toda and
               Satoshi Nakamura},
  title     = {Improving Pivot Translation by Remembering the Pivot},
  pages     = {573--577},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@inproceedings{DBLP:conf/acl/CohnL07,
  author    = {Trevor Cohn and
               Mirella Lapata},
  title     = {Machine Translation by Triangulation: Making Effective Use of Multi-Parallel
               Corpora},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@article{DBLP:journals/mt/WuW07,
  author    = {Hua Wu and
               Haifeng Wang},
  title     = {Pivot language approach for phrase-based statistical machine translation},
  journal   = {Mach. Transl.},
  volume    = {21},
  number    = {3},
  pages     = {165--181},
  year      = {2007}
}
@inproceedings{DBLP:conf/acl/WuW09,
  author    = {Hua Wu and
               Haifeng Wang},
  title     = {Revisiting Pivot Language Approach for Machine Translation},
  pages     = {154--162},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@article{DBLP:journals/corr/ChengLYSX16,
  author    = {Yong Cheng and
               Yang Liu and
               Qian Yang and
               Maosong Sun and
               Wei Xu},
  title     = {Neural Machine Translation with Pivot Languages},
  journal   = {CoRR},
  volume    = {abs/1611.04928},
  year      = {2016}
}
@inproceedings{DBLP:conf/interspeech/KauersVFW02,
  author    = {Manuel Kauers and
               Stephan Vogel and
               Christian F{\"{u}}gen and
               Alex Waibel},
  title     = {Interlingua based statistical machine translation},
  publisher = {International Symposium on Computer Architecture},
  year      = {2002}
}
@inproceedings{de2006catalan,
  title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish},
  author={De Gispert, Adri{\`a} and Marino, Jose B},
  booktitle={Proc. of 5th International Conference on Language Resources and Evaluation (LREC)},
  pages={65--68},
  year={2006}
}
@inproceedings{DBLP:conf/naacl/UtiyamaI07,
  author    = {Masao Utiyama and
               Hitoshi Isahara},
  title     = {A Comparison of Pivot Methods for Phrase-Based Statistical Machine
               Translation},
  pages     = {484--491},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/ijcnlp/Costa-JussaHB11,
  author    = {Marta R. Costa-juss{\`{a}} and
               Carlos A. Henr{\'{\i}}quez Q. and
               Rafael E. Banchs},
  title     = {Enhancing scarce-resource language translation through pivot combinations},
  pages     = {1361--1365},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@article{DBLP:journals/corr/HintonVD15,
  author    = {Geoffrey E. Hinton and
               Oriol Vinyals and
               Jeffrey Dean},
  title     = {Distilling the Knowledge in a Neural Network},
  journal   = {CoRR},
  volume    = {abs/1503.02531},
  year      = {2015}
}
@article{gu2018meta,
  title={Meta-learning for low-resource neural machine translation},
  author={Gu, Jiatao and Wang, Yong and Chen, Yun and Cho, Kyunghyun and Li, Victor OK},
  journal={arXiv preprint arXiv:1808.08437},
  year={2018}
}
@inproceedings{DBLP:conf/naacl/GuHDL18,
  author    = {Jiatao Gu and
               Hany Hassan and
               Jacob Devlin and
               Victor O. K. Li},
  title     = {Universal Neural Machine Translation for Extremely Low Resource Languages},
  pages     = {344--354},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/icml/FinnAL17,
  author    = {Chelsea Finn and
               Pieter Abbeel and
               Sergey Levine},
  title     = {Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks},
  series    = {Proceedings of Machine Learning Research},
  volume    = {70},
  pages     = {1126--1135},
  publisher = {International Conference on Machine Learning},
  year      = {2017}
}
@inproceedings{DBLP:conf/acl/DongWHYW15,
  author    = {Daxiang Dong and
               Hua Wu and
               Wei He and
               Dianhai Yu and
               Haifeng Wang},
  title     = {Multi-Task Learning for Multiple Language Translation},
  pages     = {1723--1732},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
}
@article{DBLP:journals/tacl/LeeCH17,
  author    = {Jason Lee and
               Kyunghyun Cho and
               Thomas Hofmann},
  title     = {Fully Character-Level Neural Machine Translation without Explicit
               Segmentation},
  journal   = {Trans. Assoc. Comput. Linguistics},
  volume    = {5},
  pages     = {365--378},
  year      = {2017}
}
@inproceedings{DBLP:conf/lrec/RiktersPK18,
  author    = {Matiss Rikters and
               Marcis Pinnis and
               Rihards Krislauks},
  title     = {Training and Adapting Multilingual {NMT} for Less-resourced and Morphologically
               Rich Languages},
  publisher = {European Language Resources Association},
  year      = {2018}
}
@article{DBLP:journals/tkde/PanY10,
  author    = {Sinno Jialin Pan and
               Qiang Yang},
  title     = {A Survey on Transfer Learning},
  journal   = {{IEEE} Trans. Knowl. Data Eng.},
  volume    = {22},
  number    = {10},
  pages     = {1345--1359},
  year      = {2010}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
  author    = {Melvin Johnson and
               Mike Schuster and
               Quoc V. Le and
               Maxim Krikun and
               Yonghui Wu and
               Zhifeng Chen and
               Nikhil Thorat and
               Fernanda B. Vi{\'{e}}gas and
               Martin Wattenberg and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Multilingual Neural Machine Translation System: Enabling
               Zero-Shot Translation},
  journal   = {Trans. Assoc. Comput. Linguistics},
  volume    = {5},
  pages     = {339--351},
  year      = {2017}
}
@book{2009Handbook,
  title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes},
  author={ Olivas, Emilio Soria  and  Guerrero, Jose David Martin  and  Sober, Marcelino Martinez  and  Benedito, Jose Rafael Magdalena  and  Lopez, Antonio Jose Serrano },
  publisher={Information Science Reference - Imprint of: IGI Publishing},
  year={2009},
}
@incollection{DBLP:books/crc/aggarwal14/Pan14,
  author    = {Sinno Jialin Pan},
  title     = {Transfer Learning},
  booktitle = {Data Classification: Algorithms and Applications},
  pages     = {537--570},
  publisher = {{CRC} Press},
  year      = {2014}
}
@inproceedings{DBLP:conf/iclr/TanRHQZL19,
  author    = {Xu Tan and
               Yi Ren and
               Di He and
               Tao Qin and
               Zhou Zhao and
               Tie-Yan Liu},
  title     = {Multilingual Neural Machine Translation with Knowledge Distillation},
  publisher = {OpenReview.net},
  year      = {2019}
}
@article{platanios2018contextual,
  title={Contextual parameter generation for universal neural machine translation},
  author={Platanios, Emmanouil Antonios and Sachan, Mrinmaya and Neubig, Graham and Mitchell, Tom},
  journal={arXiv preprint arXiv:1808.08493},
  year={2018}
}
@inproceedings{ji2020cross,
  title={Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation},
  author={Ji, Baijun and Zhang, Zhirui and Duan, Xiangyu and Zhang, Min and Chen, Boxing and Luo, Weihua},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={34},
  number={01},
  pages={115--122},
  year={2020}
}
@inproceedings{DBLP:conf/wmt/KocmiB18,
  author    = {Tom Kocmi and
               Ondrej Bojar},
  title     = {Trivial Transfer Learning for Low-Resource Neural Machine Translation},
  pages     = {244--252},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/ZhangWTS20,
  author    = {Biao Zhang and
               Philip Williams and
               Ivan Titov and
               Rico Sennrich},
  title     = {Improving Massively Multilingual Neural Machine Translation and Zero-Shot
               Translation},
  pages     = {1628--1639},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@inproceedings{DBLP:conf/naacl/PaulYSN09,
  author    = {Michael Paul and
               Hirofumi Yamamoto and
               Eiichiro Sumita and
               Satoshi Nakamura},
  title     = {On the Importance of Pivot Language Selection for Statistical Machine
               Translation},
  pages     = {221--224},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@article{dabre2019brief,
  title={A Brief Survey of Multilingual Neural Machine Translation},
  author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
  journal={arXiv preprint arXiv:1905.05395},
  year={2019}
}
@article{dabre2020survey,
  title={A survey of multilingual neural machine translation},
  author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
  journal={ACM Computing Surveys (CSUR)},
  volume={53},
  number={5},
  pages={1--38},
  year={2020}
}
@inproceedings{DBLP:conf/emnlp/VulicGRK19,
  author    = {Ivan Vulic and
               Goran Glavas and
               Roi Reichart and
               Anna Korhonen},
  title     = {Do We Really Need Fully Unsupervised Cross-Lingual Embeddings?},
  pages     = {4406--4417},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/corr/MikolovLS13,
  author    = {Tomas Mikolov and
               Quoc V. Le and
               Ilya Sutskever},
  title     = {Exploiting Similarities among Languages for Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1309.4168},
  year      = {2013}
}
@article{DBLP:journals/corr/MikolovLS13,
  author    = {Tomas Mikolov and
               Quoc V. Le and
               Ilya Sutskever},
  title     = {Exploiting Similarities among Languages for Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1309.4168},
  year      = {2013}
}

@inproceedings{DBLP:conf/emnlp/XuYOW18,
  author    = {Ruochen Xu and
               Yiming Yang and
               Naoki Otani and
               Yuexin Wu},
  title     = {Unsupervised Cross-lingual Transfer of Word Embedding Spaces},
  pages     = {2465--2474},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/iclr/LampleCRDJ18,
  author    = {Guillaume Lample and
               Alexis Conneau and
               Marc'Aurelio Ranzato and
               Ludovic Denoyer and
               Herv{\'{e}} J{\'{e}}gou},
  title     = {Word translation without parallel data},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}

@inproceedings{DBLP:conf/emnlp/ZhangLLS17,
  author    = {Meng Zhang and
               Yang Liu and
               Huanbo Luan and
               Maosong Sun},
  title     = {Earth Mover's Distance Minimization for Unsupervised Bilingual Lexicon
               Induction},
  pages     = {1934--1945},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}
@inproceedings{DBLP:conf/naacl/MohiuddinJ19,
  author    = {Tasnim Mohiuddin and
               Shafiq R. Joty},
  title     = {Revisiting Adversarial Autoencoder for Unsupervised Word Translation
               with Cycle Consistency and Improved Training},
  pages     = {3857--3867},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}


@inproceedings{DBLP:conf/emnlp/ArtetxeLA18,
  author    = {Mikel Artetxe and
               Gorka Labaka and
               Eneko Agirre},
  title     = {Unsupervised Statistical Machine Translation},
  pages     = {3632--3642},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
}

@article{DBLP:journals/tacl/LeeCH17,
  author    = {Jason Lee and
               Kyunghyun Cho and
               Thomas Hofmann},
  title     = {Fully Character-Level Neural Machine Translation without Explicit
               Segmentation},
  journal   = {Transactions of the Association for Computational Linguistics},
  volume    = {5},
  pages     = {365--378},
  year      = {2017}
}
@inproceedings{DBLP:conf/naacl/FiratCB16,
  author    = {Orhan Firat and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention
               Mechanism},
  pages     = {866--875},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2016}
}
@article{DBLP:journals/corr/HaNW16,
  author    = {Thanh-Le Ha and
               Jan Niehues and
               Alexander H. Waibel},
  title     = {Toward Multilingual Neural Machine Translation with Universal Encoder
               and Decoder},
  journal   = {CoRR},
  volume    = {abs/1611.04798},
  year      = {2016}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
  author    = {Melvin Johnson and
               Mike Schuster and
               Quoc V. Le and
               Maxim Krikun and
               Yonghui Wu and
               Zhifeng Chen and
               Nikhil Thorat and
               Fernanda B. Vi{\'{e}}gas and
               Martin Wattenberg and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Multilingual Neural Machine Translation System: Enabling
               Zero-Shot Translation},
  journal   = {Transactions of the Association for Computational Linguistics},
  volume    = {5},
  pages     = {339--351},
  year      = {2017}
}
@inproceedings{DBLP:conf/coling/BlackwoodBW18,
  author    = {Graeme W. Blackwood and
               Miguel Ballesteros and
               Todd Ward},
  title     = {Multilingual Neural Machine Translation with Task-Specific Attention},
  pages     = {3112--3122},
  publisher = {International Conference on Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/wmt/SachanN18,
  author    = {Devendra Singh Sachan and
               Graham Neubig},
  title     = {Parameter Sharing Methods for Multilingual Self-Attentional Translation
               Models},
  pages     = {261--271},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/wmt/LuKLBZS18,
  author    = {Yichao Lu and
               Phillip Keung and
               Faisal Ladhak and
               Vikas Bhardwaj and
               Shaonan Zhang and
               Jason Sun},
  title     = {A neural interlingua for multilingual machine translation},
  pages     = {84--92},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/WangZZZXZ19,
  author    = {Yining Wang and
               Long Zhou and
               Jiajun Zhang and
               Feifei Zhai and
               Jingfang Xu and
               Chengqing Zong},
  title     = {A Compact and Language-Sensitive Multilingual Translation Method},
  pages     = {1213--1223},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/iclr/WangPAN19,
  author    = {Xinyi Wang and
               Hieu Pham and
               Philip Arthur and
               Graham Neubig},
  title     = {Multilingual Neural Machine Translation With Soft Decoupled Encoding},
  publisher = {International Conference on Learning Representations},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/TanCHXQL19,
  author    = {Xu Tan and
               Jiale Chen and
               Di He and
               Yingce Xia and
               Tao Qin and
               Tie-Yan Liu},
  title     = {Multilingual Neural Machine Translation with Language Clustering},
  pages     = {963--973},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2019}
}

@inproceedings{2019Consistency,
  title={Consistency by Agreement in Zero-Shot Neural Machine Translation},
  author={Al-Shedivat, Maruan  and  Parikh, Ankur },
  publisher={Proceedings of the 2019 Conference of the North},
  year={2019},
}
@article{DBLP:journals/corr/abs-1903-07091,
  author    = {Naveen Arivazhagan and
               Ankur Bapna and
               Orhan Firat and
               Roee Aharoni and
               Melvin Johnson and
               Wolfgang Macherey},
  title     = {The Missing Ingredient in Zero-Shot Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1903.07091},
  year      = {2019}
}
@inproceedings{DBLP:conf/naacl/Al-ShedivatP19,
  author    = {Maruan Al-Shedivat and
               Ankur P. Parikh},
  title     = {Consistency by Agreement in Zero-Shot Neural Machine Translation},
  pages     = {1184--1197},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
}
@article{firat2016zero,
  title={Zero-resource translation with multi-lingual neural machine translation},
  author={Firat, Orhan and Sankaran, Baskaran and Al-Onaizan, Yaser and Vural, Fatos T Yarman and Cho, Kyunghyun},
  journal={arXiv preprint arXiv:1606.04164},
  year={2016}
}
@article{DBLP:journals/corr/abs-1805-10338,
  author    = {Lierni Sestorain and
               Massimiliano Ciaramita and
               Christian Buck and
               Thomas Hofmann},
  title     = {Zero-Shot Dual Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1805.10338},
  year      = {2018}
}
@inproceedings{DBLP:conf/acl/GuWCL19,
  author    = {Jiatao Gu and
               Yong Wang and
               Kyunghyun Cho and
               Victor O. K. Li},
  title     = {Improved Zero-shot Neural Machine Translation via Ignoring Spurious
               Correlations},
  pages     = {1258--1268},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/FiratSAYC16,
  author    = {Orhan Firat and
               Baskaran Sankaran and
               Yaser Al-Onaizan and
               Fatos T. Yarman-Vural and
               Kyunghyun Cho},
  title     = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation},
  pages     = {268--277},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}
@inproceedings{DBLP:conf/emnlp/CurreyH19,
  author    = {Anna Currey and
               Kenneth Heafield},
  title     = {Zero-Resource Neural Machine Translation with Monolingual Pivot Data},
  pages     = {99--107},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/FadaeeBM17a,
  author    = {Marzieh Fadaee and
               Arianna Bisazza and
               Christof Monz},
  title     = {Data Augmentation for Low-Resource Neural Machine Translation},
  pages     = {567--573},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/WangPDN18,
  author    = {Xinyi Wang and
               Hieu Pham and
               Zihang Dai and
               Graham Neubig},
  title     = {SwitchOut: an Efficient Data Augmentation Algorithm for Neural Machine
               Translation},
  pages     = {856--861},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/MartonCR09,
  author    = {Yuval Marton and
               Chris Callison-Burch and
               Philip Resnik},
  title     = {Improved Statistical Machine Translation Using Monolingually-Derived
               Paraphrases},
  pages     = {381--390},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{DBLP:conf/eacl/LapataSM17,
  author    = {Jonathan Mallinson and
               Rico Sennrich and
               Mirella Lapata},
  title     = {Paraphrasing Revisited with Neural Machine Translation},
  pages     = {881--893},
  publisher = {European Association of Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/aclnmt/ImamuraFS18,
  author    = {Kenji Imamura and
               Atsushi Fujita and
               Eiichiro Sumita},
  title     = {Enhancement of Encoder and Attention Using Target Monolingual Corpora
               in Neural Machine Translation},
  pages     = {55--63},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/icml/VincentLBM08,
  author    = {Pascal Vincent and
               Hugo Larochelle and
               Yoshua Bengio and
               Pierre-Antoine Manzagol},
  title     = {Extracting and composing robust features with denoising autoencoders},
  series    = {{ACM} International Conference Proceeding Series},
  volume    = {307},
  pages     = {1096--1103},
  publisher = {International Conference on Machine Learning}
}
@article{DBLP:journals/ipm/FarhanTAJATT20,
  author    = {Wael Farhan and
               Bashar Talafha and
               Analle Abuammar and
               Ruba Jaikat and
               Mahmoud Al-Ayyoub and
               Ahmad Bisher Tarakji and
               Anas Toma},
  title     = {Unsupervised dialectal neural machine translation},
  journal   = {Inform Process Manag},
  volume    = {57},
  number    = {3},
  pages     = {102181},
  year      = {2020}
}
@inproceedings{DBLP:conf/iclr/LampleCDR18,
  author    = {Guillaume Lample and
               Alexis Conneau and
               Ludovic Denoyer and
               Marc'Aurelio Ranzato},
  title     = {Unsupervised Machine Translation Using Monolingual Corpora Only},
  publisher = {International Conference on Learning Representations},
  year      = {2018}
}
@article{DBLP:journals/coling/BhagatH13,
  author    = {Rahul Bhagat and
               Eduard H. Hovy},
  title     = {What Is a Paraphrase?},
  journal   = {Computational Linguistics},
  volume    = {39},
  number    = {3},
  pages     = {463--472},
  year      = {2013}
}
@article{2010Generating,
  title={Generating Phrasal and Sentential Paraphrases: A Survey of Data-Driven Methods},
  author={ Madnani, Nitin  and  Dorr, Bonnie J. },
  journal={Computational Linguistics},
  volume={36},
  number={3},
  pages={341-387},
  year={2010},
}
@inproceedings{DBLP:conf/wmt/GuoH19,
  author    = {Yinuo Guo and
               Junfeng Hu},
  title     = {Meteor++ 2.0: Adopt Syntactic Level Paraphrase Knowledge into Machine
               Translation Evaluation},
  pages     = {501--506},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/ZhouSW19,
  author    = {Zhong Zhou and
               Matthias Sperber and
               Alexander H. Waibel},
  title     = {Paraphrases as Foreign Languages in Multilingual Neural Machine Translation},
  pages     = {113--122},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/eacl/LapataSM17,
  author    = {Jonathan Mallinson and
               Rico Sennrich and
               Mirella Lapata},
  title     = {Paraphrasing Revisited with Neural Machine Translation},
  pages     = {881--893},
  publisher = {European Association of Computational Linguistics},
  year      = {2017}
}
@inproceedings{yasuda2008method,
  title={Method for building sentence-aligned corpus from wikipedia},
  author={Yasuda, Keiji and Sumita, Eiichiro},
  publisher={2008 AAAI Workshop on Wikipedia and Artificial Intelligence},
  pages={263--268},
  year={2008}
}
@article{2005Improving,
  title={Improving Machine Translation Performance by Exploiting Non-Parallel Corpora},
  author={ Munteanu, Ds  and  Marcu, D },
  journal={Computational Linguistics},
  volume={31},
  number={4},
  pages={477-504},
  year={2005},
}
@inproceedings{DBLP:conf/naacl/SmithQT10,
  author    = {Jason R. Smith and
               Chris Quirk and
               Kristina Toutanova},
  title     = {Extracting Parallel Sentences from Comparable Corpora using Document
               Level Alignment},
  pages     = {403--411},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@article{DBLP:journals/jair/RuderVS19,
  author    = {Sebastian Ruder and
               Ivan Vulic and
               Anders S{\o}gaard},
  title     = {A Survey of Cross-lingual Word Embedding Models},
  journal   = {J. Artif. Intell. Res.},
  volume    = {65},
  pages     = {569--631},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/TuLLLL16,
  author    = {Zhaopeng Tu and
               Zhengdong Lu and
               Yang Liu and
               Xiaohua Liu and
               Hang Li},
  title     = {Modeling Coverage for Neural Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}
@article{DBLP:journals/tacl/TuLLLL17,
  author    = {Zhaopeng Tu and
               Yang Liu and
               Zhengdong Lu and
               Xiaohua Liu and
               Hang Li},
  title     = {Context Gates for Neural Machine Translation},
  journal   = {Annual Meeting of the Association for Computational Linguistics},
  volume    = {5},
  pages     = {87--99},
  year      = {2017}
}
@inproceedings{DBLP:conf/wmt/WangCJYCLSWY17,
  author    = {Yuguang Wang and
               Shanbo Cheng and
               Liyang Jiang and
               Jiajun Yang and
               Wei Chen and
               Muze Li and
               Lin Shi and
               Yanfeng Wang and
               Hongtao Yang},
  title     = {Sogou Neural Machine Translation Systems for {WMT17}},
  pages     = {410--415},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@article{ng2019facebook,
  title={Facebook FAIR's WMT19 News Translation Task Submission},
  author={Ng, Nathan and Yee, Kyra and Baevski, Alexei and Ott, Myle and Auli, Michael and Edunov, Sergey},
  journal={arXiv preprint arXiv:1907.06616},
  year={2019}
}
@inproceedings{DBLP:conf/wmt/WangLLJZLLXZ18,
  author    = {Qiang Wang and
               Bei Li and
               Jiqiang Liu and
               Bojian Jiang and
               Zheyang Zhang and
               Yinqiao Li and
               Ye Lin and
               Tong Xiao and
               Jingbo Zhu},
  title     = {The NiuTrans Machine Translation System for {WMT18}},
  pages     = {528--534},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19,
  author    = {Bei Li and
               Yinqiao Li and
               Chen Xu and
               Ye Lin and
               Jiqiang Liu and
               Hui Liu and
               Ziyang Wang and
               Yuhao Zhang and
               Nuo Xu and
               Zeyang Wang and
               Kai Feng and
               Hexuan Chen and
               Tengbo Liu and
               Yanyang Li and
               Qiang Wang and
               Tong Xiao and
               Jingbo Zhu},
  title     = {The NiuTrans Machine Translation Systems for {WMT19}},
  pages     = {257--266},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/nips/DaiL15,
  author    = {Andrew M. Dai and
               Quoc V. Le},
  title     = {Semi-supervised Sequence Learning},
  pages     = {3079--3087},
  publisher = {Conference and Workshop on Neural Information Processing Systems},
  year      = {2015}
}
@article{DBLP:journals/corr/abs-1802-05365,
  author    = {Matthew E. Peters and
               Mark Neumann and
               Mohit Iyyer and
               Matt Gardner and
               Christopher Clark and
               Kenton Lee and
               Luke Zettlemoyer},
  title     = {Deep contextualized word representations},
  journal   = {CoRR},
  volume    = {abs/1802.05365},
  year      = {2018}
}
@inproceedings{DBLP:conf/icml/CollobertW08,
  author    = {Ronan Collobert and
               Jason Weston},
  title     = {A unified architecture for natural language processing: deep neural
               networks with multitask learning},
  volume    = {307},
  pages     = {160--167},
  publisher = {International Conference on Machine Learning},
  year      = {2008}
}
@inproceedings{DBLP:conf/aclwat/NeishiSTIYT17,
  author    = {Masato Neishi and
               Jin Sakuma and
               Satoshi Tohda and
               Shonosuke Ishiwatari and
               Naoki Yoshinaga and
               Masashi Toyoda},
  title     = {A Bag of Useful Tricks for Practical Neural Machine Translation: Embedding
               Layer Initialization and Large Batch Size},
  pages     = {99--109},
  publisher = {Asian Federation of Natural Language Processing},
  year      = {2017}
}
@inproceedings{2018When,
  title={When and Why are Pre-trainedWord Embeddings Useful for Neural Machine Translation?},
  author={ Qi, Ye  and  Sachan, Devendra Singh  and  Felix, Matthieu  and  Padmanabhan, Sarguna Janani  and  Neubig, Graham },
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year={2018},
}
@inproceedings{DBLP:conf/acl/PetersABP17,
  author    = {Matthew E. Peters and
               Waleed Ammar and
               Chandra Bhagavatula and
               Russell Power},
  title     = {Semi-supervised sequence tagging with bidirectional language models},
  pages     = {1756--1765},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/naacl/PetersNIGCLZ18,
  author    = {Matthew E. Peters and
               Mark Neumann and
               Mohit Iyyer and
               Matt Gardner and
               Christopher Clark and
               Kenton Lee and
               Luke Zettlemoyer},
  title     = {Deep Contextualized Word Representations},
  pages     = {2227--2237},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/naacl/PetersNIGCLZ18,
  author    = {Matthew E. Peters and
               Mark Neumann and
               Mohit Iyyer and
               Matt Gardner and
               Christopher Clark and
               Kenton Lee and
               Luke Zettlemoyer},
  title     = {Deep Contextualized Word Representations},
  pages     = {2227--2237},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/naacl/PetersNIGCLZ18,
  author    = {Matthew E. Peters and
               Mark Neumann and
               Mohit Iyyer and
               Matt Gardner and
               Christopher Clark and
               Kenton Lee and
               Luke Zettlemoyer},
  title     = {Deep Contextualized Word Representations},
  pages     = {2227--2237},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/emnlp/ClinchantJN19,
  author    = {St{\'{e}}phane Clinchant and
               Kweon Woo Jung and
               Vassilina Nikoulina},
  title     = {On the use of {BERT} for Neural Machine Translation},
  pages     = {108--117},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/ImamuraS19,
  author    = {Kenji Imamura and
               Eiichiro Sumita},
  title     = {Recycling a Pre-trained {BERT} Encoder for Neural Machine Translation},
  pages     = {23--31},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/naacl/EdunovBA19,
  author    = {Sergey Edunov and
               Alexei Baevski and
               Michael Auli},
  title     = {Pre-trained language model representations for language generation},
  pages     = {4052--4059},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/corr/abs-1908-06259,
  author    = {Tianyu He and
               Xu Tan and
               Tao Qin},
  title     = {Hard but Robust, Easy but Sensitive: How Encoder and Decoder Perform
               in Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1908.06259},
  year      = {2019}
}
@inproceedings{DBLP:conf/aaai/YangW0Z00020,
  author    = {Jiacheng Yang and
               Mingxuan Wang and
               Hao Zhou and
               Chengqi Zhao and
               Weinan Zhang and
               Yong Yu and
               Lei Li},
  title     = {Towards Making the Most of {BERT} in Neural Machine Translation},
  pages     = {9378--9385},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
}
@inproceedings{DBLP:conf/acl/LewisLGGMLSZ20,
  author    = {Mike Lewis and
               Yinhan Liu and
               Naman Goyal and
               Marjan Ghazvininejad and
               Abdelrahman Mohamed and
               Omer Levy and
               Veselin Stoyanov and
               Luke Zettlemoyer},
  title     = {{BART:} Denoising Sequence-to-Sequence Pre-training for Natural Language
               Generation, Translation, and Comprehension},
  pages     = {7871--7880},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@inproceedings{DBLP:conf/emnlp/QiYGLDCZ020,
  author    = {Weizhen Qi and
               Yu Yan and
               Yeyun Gong and
               Dayiheng Liu and
               Nan Duan and
               Jiusheng Chen and
               Ruofei Zhang and
               Ming Zhou},
  title     = {ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training},
  pages     = {2401--2410},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@incollection{DBLP:books/sp/98/Caruana98,
  author    = {Rich Caruana},
  title     = {Multitask Learning},
  pages     = {95--133},
  publisher = {Springer},
  year      = {1998}
}
@article{liu2019multi,
  title={Multi-task deep neural networks for natural language understanding},
  author={Liu, Xiaodong and He, Pengcheng and Chen, Weizhu and Gao, Jianfeng},
  journal={arXiv preprint arXiv:1901.11504},
  year={2019}
}
@inproceedings{DBLP:journals/corr/LuongLSVK15,
  author    = {Minh-Thang Luong and
               Quoc V. Le and
               Ilya Sutskever and
               Oriol Vinyals and
               Lukasz Kaiser},
  title     = {Multi-task Sequence to Sequence Learning},
  publisher = {International Conference on Learning Representations},
  year      = {2016}
}
@inproceedings{DBLP:conf/emnlp/ZhangZ16,
  author    = {Jiajun Zhang and
               Chengqing Zong},
  title     = {Exploiting Source-side Monolingual Data in Neural Machine Translation},
  pages     = {1535--1545},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
  author    = {Melvin Johnson and
               Mike Schuster and
               Quoc V. Le and
               Maxim Krikun and
               Yonghui Wu and
               Zhifeng Chen and
               Nikhil Thorat and
               Fernanda B. Vi{\'{e}}gas and
               Martin Wattenberg and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Multilingual Neural Machine Translation System: Enabling
               Zero-Shot Translation},
  journal   = {Transactions of the Association for Computational Linguistics},
  volume    = {5},
  pages     = {339--351},
  year      = {2017}
}
@article{DBLP:journals/csl/GulcehreFXCB17,
  author    = {{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               Orhan Firat and
               Kelvin Xu and
               Kyunghyun Cho and
               Yoshua Bengio},
  title     = {On integrating a language model into neural machine translation},
  journal   = {Computational Linguistics},
  volume    = {45},
  pages     = {137--148},
  year      = {2017}
}
@inproceedings{DBLP:conf/wmt/StahlbergCS18,
  author    = {Felix Stahlberg and
               James Cross and
               Veselin Stoyanov},
  title     = {Simple Fusion: Return of the Language Model},
  pages     = {204--211},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}
@inproceedings{DBLP:conf/iccv/SunSSG17,
  author    = {Chen Sun and
               Abhinav Shrivastava and
               Saurabh Singh and
               Abhinav Gupta},
  title     = {Revisiting Unreasonable Effectiveness of Data in Deep Learning Era},
  pages     = {843--852},
  publisher = {{IEEE} Computer Society},
  year      = {2017}
}
@inproceedings{DBLP:conf/acl/DuhNST13,
  author    = {Kevin Duh and
               Graham Neubig and
               Katsuhito Sudoh and
               Hajime Tsukada},
  title     = {Adaptation Data Selection using Neural Language Models: Experiments
               in Machine Translation},
  pages     = {678--683},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/wmt/FosterK07,
  author    = {George F. Foster and
               Roland Kuhn},
  title     = {Mixture-Model Adaptation for {SMT}},
  pages     = {128--135},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/iwslt/BisazzaRF11,
  author    = {Arianna Bisazza and
               Nick Ruiz and
               Marcello Federico},
  title     = {Fill-up versus interpolation methods for phrase-based {SMT} adaptation},
  pages     = {136--143},
  publisher = {International Symposium on Computer Architecture},
  year      = {2011}
}
@inproceedings{niehues2012detailed,
  title={Detailed analysis of different strategies for phrase table adaptation in SMT},
  author={Niehues, Jan and Waibel, Alex},
  publisher={Association for Machine Translation in the Americas},
  year={2012}
}
@inproceedings{DBLP:conf/acl/SennrichSA13,
  author    = {Rico Sennrich and
               Holger Schwenk and
               Walid Aransa},
  title     = {A Multi-Domain Translation Model Framework for Statistical Machine
               Translation},
  pages     = {832--840},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@article{joty2015using,
  title={Using joint models for domain adaptation in statistical machine translation},
  author={Joty, Nadir Durrani Hassan Sajjad Shafiq and Vogel, Ahmed Abdelali Stephan},
  journal={Proceedings of MT Summit XV},
  pages={117},
  year={2015}
}
@article{imamura2016multi,
  title={Multi-domain adaptation for statistical machine translation based on feature augmentation},
  author={Imamura, Kenji and Sumita, Eiichiro},
  journal={Association for Machine Translation in the Americas},
  pages={79},
  year={2016}
}
@inproceedings{DBLP:conf/emnlp/MatsoukasRZ09,
  author    = {Spyros Matsoukas and
               Antti-Veikko I. Rosti and
               Bing Zhang},
  title     = {Discriminative Corpus Weight Estimation for Machine Translation},
  pages     = {708--717},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2009}
}
@inproceedings{DBLP:conf/emnlp/FosterGK10,
  author    = {George F. Foster and
               Cyril Goutte and
               Roland Kuhn},
  title     = {Discriminative Instance Weighting for Domain Adaptation in Statistical
               Machine Translation},
  pages     = {451--459},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2010}
}
@article{shah2012general,
  title={A general framework to weight heterogeneous parallel data for model adaptation in statistical machine translation},
  author={Shah, Kashif and Barrault, Lo{\i}c and Schwenk, Holger and Le Mans, France},
  journal={MT Summit, Octobre},
  year={2012}
}
@inproceedings{DBLP:conf/iwslt/MansourN12,
  author    = {Saab Mansour and
               Hermann Ney},
  title     = {A simple and effective weighted phrase extraction for machine translation
               adaptation},
  pages     = {193--200},
  publisher = {International Symposium on Computer Architecture},
  year      = {2012}
}
@inproceedings{DBLP:conf/cncl/ZhouCZ15,
  author    = {Xinpeng Zhou and
               Hailong Cao and
               Tiejun Zhao},
  title     = {Domain Adaptation for {SMT} Using Sentence Weight},
  volume    = {9427},
  pages     = {153--163},
  publisher = {Springer},
  year      = {2015}
}
@inproceedings{DBLP:conf/lrec/EckVW04,
  author    = {Matthias Eck and
               Stephan Vogel and
               Alex Waibel},
  title     = {Language Model Adaptation for Statistical Machine Translation Based
               on Information Retrieval},
  publisher = {European Language Resources Association},
  year      = {2004}
}
@inproceedings{DBLP:conf/coling/ZhaoEV04,
  author    = {Bing Zhao and
               Matthias Eck and
               Stephan Vogel},
  title     = {Language Model Adaptation for Statistical Machine Translation via
               Structured Query Models},
  publisher = {International Conference on Computational Linguistics},
  year      = {2004}
}
@inproceedings{DBLP:conf/acl/MooreL10,
  author    = {Robert C. Moore and
               William D. Lewis},
  title     = {Intelligent Selection of Language Model Training Data},
  pages     = {220--224},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/acl/DuhNST13,
  author    = {Kevin Duh and
               Graham Neubig and
               Katsuhito Sudoh and
               Hajime Tsukada},
  title     = {Adaptation Data Selection using Neural Language Models: Experiments
               in Machine Translation},
  pages     = {678--683},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}
@inproceedings{DBLP:conf/coling/HoangS14,
  author    = {Cuong Hoang and
               Khalil Sima'an},
  title     = {Latent Domain Translation Models in Mix-of-Domains Haystack},
  pages     = {1928--1939},
  publisher = {International Conference on Computational Linguistics},
  year      = {2014}
}
@article{joty2015using,
  title={Using joint models for domain adaptation in statistical machine translation},
  author={Joty, Nadir Durrani Hassan Sajjad Shafiq and Vogel, Ahmed Abdelali Stephan},
  journal={Proceedings of MT Summit XV},
  pages={117},
  year={2015}
}
@inproceedings{chen2016bilingual,
  title={Bilingual methods for adaptive training data selection for machine translation},
  author={Chen, Boxing and Kuhn, Roland and Foster, George and Cherry, Colin and Huang, Fei},
  booktitle={Association for Machine Translation in the Americas},
  pages={93--103},
  year={2016}
}
@inproceedings{DBLP:conf/iwslt/Ueffing06,
  author    = {Nicola Ueffing},
  title     = {Using monolingual source-language data to improve {MT} performance},
  pages     = {174--181},
  publisher = {International Symposium on Computer Architecture},
  year      = {2006}
}
@inproceedings{DBLP:conf/coling/WuWZ08,
  author    = {Hua Wu and
               Haifeng Wang and
               Chengqing Zong},
  title     = {Domain Adaptation for Statistical Machine Translation with Domain
               Dictionary and Monolingual Corpora},
  publisher = {International Conference on Computational Linguistics},
  pages     = {993--1000},
  year      = {2008}
}
@inproceedings{DBLP:conf/iwslt/Schwenk08,
  author    = {Holger Schwenk},
  title     = {Investigations on large-scale lightly-supervised training for statistical
               machine translation},
  pages     = {182--189},
  publisher = {International Symposium on Computer Architecture},
  year      = {2008}
}
@inproceedings{DBLP:conf/wmt/BertoldiF09,
  author    = {Nicola Bertoldi and
               Marcello Federico},
  title     = {Domain Adaptation for Statistical Machine Translation with Monolingual
               Resources},
  pages     = {182--189},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}
@inproceedings{DBLP:conf/wmt/LambertSSA11,
  author    = {Patrik Lambert and
               Holger Schwenk and
               Christophe Servan and
               Sadaf Abdul-Rauf},
  title     = {Investigations on Translation Model Adaptation Using Monolingual Data},
  pages     = {284--293},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
}
@inproceedings{DBLP:conf/eacl/Sennrich12,
  author    = {Rico Sennrich},
  title     = {Perplexity Minimization for Translation Model Domain Adaptation in
               Statistical Machine Translation},
  pages     = {539--549},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}
@inproceedings{DBLP:conf/wmt/FosterK07,
  author    = {George F. Foster and
               Roland Kuhn},
  title     = {Mixture-Model Adaptation for {SMT}},
  pages     = {128--135},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}
@inproceedings{DBLP:conf/emnlp/MatsoukasRZ09,
  author    = {Spyros Matsoukas and
               Antti-Veikko I. Rosti and
               Bing Zhang},
  title     = {Discriminative Corpus Weight Estimation for Machine Translation},
  pages     = {708--717},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2009}
}
@inproceedings{DBLP:conf/emnlp/FosterGK10,
  author    = {George F. Foster and
               Cyril Goutte and
               Roland Kuhn},
  title     = {Discriminative Instance Weighting for Domain Adaptation in Statistical
               Machine Translation},
  pages     = {451--459},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{DBLP:conf/wmt/ShahBS10,
  author    = {Kashif Shah and
               Lo{\"{\i}}c Barrault and
               Holger Schwenk},
  title     = {Translation Model Adaptation by Resampling},
  pages     = {392--399},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}
@inproceedings{rousseau2011lium,
  title={LIUM's systems for the IWSLT 2011 Speech Translation Tasks},
  author={Rousseau, Anthony and Bougares, Fethi and Del{\'e}glise, Paul and Schwenk, Holger and Est{\`e}ve, Yannick},
  publisher={International Workshop on Spoken Language Translation},
  year={2011}
}
@inproceedings{DBLP:conf/lrec/EckVW04,
  author    = {Matthias Eck and
               Stephan Vogel and
               Alex Waibel},
  title     = {Language Model Adaptation for Statistical Machine Translation Based
               on Information Retrieval},
  publisher = {European Language Resources Association},
  year      = {2004}
}
@inproceedings{DBLP:conf/coling/ZhaoEV04,
  author    = {Bing Zhao and
               Matthias Eck and
               Stephan Vogel},
  title     = {Language Model Adaptation for Statistical Machine Translation via
               Structured Query Models},
  publisher = {International Conference on Computational Linguistics},
  year      = {2004}
}
@article{moore2010intelligent,
  title = {Intelligent selection of language model training data},
  author = {Moore, Robert C and Lewis, Will},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year = {2010}
}
@inproceedings{DBLP:conf/acl/UtiyamaI03,
  author    = {Masao Utiyama and
               Hitoshi Isahara},
  title     = {Reliable Measures for Aligning Japanese-English News Articles and
               Sentences},
  pages     = {72--79},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2003}
}
@inproceedings{DBLP:conf/acl/MarieF17,
  author    = {Benjamin Marie and
               Atsushi Fujita},
  title     = {Efficient Extraction of Pseudo-Parallel Sentences from Raw Monolingual
               Data Using Word Embeddings},
  pages     = {392--398},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/WangZLUS14,
  author    = {Rui Wang and
               Hai Zhao and
               Bao-Liang Lu and
               Masao Utiyama and
               Eiichiro Sumita},
  title     = {Neural Network Based Bilingual Language Model Growing for Statistical
               Machine Translation},
  pages     = {189--195},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2014}
}
@inproceedings{DBLP:conf/coling/WangZLUS16,
  author    = {Rui Wang and
               Hai Zhao and
               Bao-Liang Lu and
               Masao Utiyama and
               Eiichiro Sumita},
  title     = {Connecting Phrase based Statistical Machine Translation Adaptation},
  pages     = {3135--3145},
  publisher = {International Conference on Computational Linguistics},
  year      = {2016}
}
@article{chu2015integrated,
  title={Integrated parallel data extraction from comparable corpora for statistical machine translation},
  author={Chu, Chenhui},
  year={2015},
  publisher={Kyoto University}
}
@article{DBLP:journals/tit/Scudder65a,
  author    = {H. J. Scudder III},
  title     = {Probability of error of some adaptive pattern-recognition machines},
  journal   = {{IEEE} Transactions on Information Theory},
  volume    = {11},
  number    = {3},
  pages     = {363--371},
  year      = {1965}
}
@inproceedings{DBLP:conf/coling/ChuW18,
  author    = {Chenhui Chu and
               Rui Wang},
  title     = {A Survey of Domain Adaptation for Neural Machine Translation},
  pages     = {1304--1319},
  publisher = {International Conference on Computational Linguistics},
  year      = {2018}
}
@article{DBLP:journals/corr/abs-1708-08712,
  author    = {Hassan Sajjad and
               Nadir Durrani and
               Fahim Dalvi and
               Yonatan Belinkov and
               Stephan Vogel},
  title     = {Neural Machine Translation Training in a Multi-Domain Scenario},
  journal   = {CoRR},
  volume    = {abs/1708.08712},
  year      = {2017}
}
@inproceedings{DBLP:conf/acl/WangTNYCP20,
  author    = {Wei Wang and
               Ye Tian and
               Jiquan Ngiam and
               Yinfei Yang and
               Isaac Caswell and
               Zarana Parekh},
  title     = {Learning a Multi-Domain Curriculum for Neural Machine Translation},
  pages     = {7711--7723},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@inproceedings{DBLP:conf/acl/JiangLWZ20,
  author    = {Haoming Jiang and
               Chen Liang and
               Chong Wang and
               Tuo Zhao},
  title     = {Multi-Domain Neural Machine Translation with Word-Level Adaptive Layer-wise
               Domain Mixing},
  pages     = {1823--1834},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@inproceedings{DBLP:conf/emnlp/AxelrodHG11,
  author    = {Amittai Axelrod and
               Xiaodong He and
               Jianfeng Gao},
  title     = {Domain Adaptation via Pseudo In-Domain Data Selection},
  pages     = {355--362},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2011}
}
@inproceedings{DBLP:conf/icdm/Remus12,
  author    = {Robert Remus},
  title     = {Domain Adaptation Using Domain Similarity- and Domain Complexity-Based
               Instance Selection for Cross-Domain Sentiment Analysis},
  pages     = {717--723},
  publisher = {{IEEE} Computer Society},
  year      = {2012}
}
@inproceedings{DBLP:conf/acl/WangFUS17,
  author    = {Rui Wang and
               Andrew M. Finch and
               Masao Utiyama and
               Eiichiro Sumita},
  title     = {Sentence Embedding for Neural Machine Translation Domain Adaptation},
  pages     = {560--566},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/acl/HuXNC19,
  author    = {Junjie Hu and
               Mengzhou Xia and
               Graham Neubig and
               Jaime G. Carbonell},
  title     = {Domain Adaptation of Neural Machine Translation by Lexicon Induction},
  pages     = {2989--3001},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{2019Non,
  title={Non-Parametric Adaptation for Neural Machine Translation},
  author={Bapna, Ankur  and  Firat, Orhan },
  booktitle={Conference of the North},
  year={2019},
}
@inproceedings{britz2017effective,
  title={Effective domain mixing for neural machine translation},
  author={Britz, Denny and Le, Quoc and Pryzant, Reid},
  booktitle={Proceedings of the Second Conference on Machine Translation},
  pages={118--126},
  year={2017}
}
@inproceedings{DBLP:conf/ranlp/KobusCS17,
  author    = {Catherine Kobus and
               Josep Maria Crego and
               Jean Senellart},
  title     = {Domain Control for Neural Machine Translation},
  pages     = {372--378},
  publisher = {International Conference Recent Advances in Natural
               Language Processing},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/WangULCS17,
  author    = {Rui Wang and
               Masao Utiyama and
               Lemao Liu and
               Kehai Chen and
               Eiichiro Sumita},
  title     = {Instance Weighting for Neural Machine Translation Domain Adaptation},
  pages     = {1482--1488},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}
@inproceedings{DBLP:conf/aclnmt/ChenCFL17,
  author    = {Boxing Chen and
               Colin Cherry and
               George F. Foster and
               Samuel Larkin},
  title     = {Cost Weighting for Neural Machine Translation Domain Adaptation},
  pages     = {40--46},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@article{DBLP:journals/corr/abs-1906-03129,
  author    = {Shen Yan and
               Leonard Dahlmann and
               Pavel Petrushkov and
               Sanjika Hewavitharana and
               Shahram Khadivi},
  title     = {Word-based Domain Adaptation for Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1906.03129},
  year      = {2019}
}
@article{dakwale2017finetuning,
  title={Finetuning for neural machine translation with limited degradation across in-and out-of-domain data},
  author={Dakwale, Praveen and Monz, Christof},
  journal={Proceedings of the XVI Machine Translation Summit},
  volume={117},
  year={2017}
}
@inproceedings{DBLP:conf/emnlp/ZengLSGLYL19,
  author    = {Jiali Zeng and
               Yang Liu and
               Jinsong Su and
               Yubin Ge and
               Yaojie Lu and
               Yongjing Yin and
               Jiebo Luo},
  title     = {Iterative Dual Domain Adaptation for Neural Machine Translation},
  pages     = {845--855},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2019}
}
@article{barone2017regularization,
  title={Regularization techniques for fine-tuning in neural machine translation},
  author={Barone, Antonio Valerio Miceli and Haddow, Barry and Germann, Ulrich and Sennrich, Rico},
  journal={arXiv preprint arXiv:1707.09920},
  year={2017}
}
@inproceedings{DBLP:conf/acl/SaundersB20,
  author    = {Danielle Saunders and
               Bill Byrne},
  title     = {Reducing Gender Bias in Neural Machine Translation as a Domain Adaptation
               Problem},
  pages     = {7724--7736},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
}
@inproceedings{khayrallah2017neural,
  title={Neural lattice search for domain adaptation in machine translation},
  author={Khayrallah, Huda and Kumar, Gaurav and Duh, Kevin and Post, Matt and Koehn, Philipp},
  booktitle={Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  pages={20--25},
  year={2017}
}
@inproceedings{DBLP:conf/emnlp/DouWHN19,
  author    = {Zi-Yi Dou and
               Xinyi Wang and
               Junjie Hu and
               Graham Neubig},
  title     = {Domain Differential Adaptation for Neural Machine Translation},
  pages     = {59--69},
  publisher = {Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/corr/FreitagA16,
  author    = {Markus Freitag and
               Yaser Al-Onaizan},
  title     = {Fast Domain Adaptation for Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1612.06897},
  year      = {2016}
}
@inproceedings{DBLP:conf/acl/SaundersSGB19,
  author    = {Danielle Saunders and
               Felix Stahlberg and
               Adri{\`{a}} de Gispert and
               Bill Byrne},
  title     = {Domain Adaptive Inference for Neural Machine Translation},
  pages     = {222--228},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/wmt/BritzLP17,
  author    = {Denny Britz and
               Quoc V. Le and
               Reid Pryzant},
  title     = {Effective Domain Mixing for Neural Machine Translation},
  pages     = {118--126},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@article{DBLP:journals/ibmrd/Luhn58,
  author    = {Hans Peter Luhn},
  title     = {The Automatic Creation of Literature Abstracts},
  journal   = {{IBM} J. Res. Dev.},
  volume    = {2},
  number    = {2},
  pages     = {159--165},
  year      = {1958}
}
@inproceedings{DBLP:conf/emnlp/DomhanH17,
  author    = {Tobias Domhan and
               Felix Hieber},
  title     = {Using Target-side Monolingual Data for Neural Machine Translation
               through Multi-task Learning},
  pages     = {1500--1505},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}
@inproceedings{DBLP:conf/naacl/SimianerWD19,
  author    = {Patrick Simianer and
               Joern Wuebker and
               John DeNero},
  title     = {Measuring Immediate Adaptation Performance for Neural Machine Translation},
  pages     = {2038--2046},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
}
@article{DBLP:journals/corr/abs-1906-03129,
  author    = {Shen Yan and
               Leonard Dahlmann and
               Pavel Petrushkov and
               Sanjika Hewavitharana and
               Shahram Khadivi},
  title     = {Word-based Domain Adaptation for Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1906.03129},
  year      = {2019}
}
@inproceedings{DBLP:conf/emnlp/WeesBM17,
  author    = {Marlies van der Wees and
               Arianna Bisazza and
               Christof Monz},
  title     = {Dynamic Data Selection for Neural Machine Translation},
  pages     = {1400--1410},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}
@inproceedings{DBLP:conf/naacl/ZhangSKMCD19,
  author    = {Xuan Zhang and
               Pamela Shapiro and
               Gaurav Kumar and
               Paul McNamee and
               Marine Carpuat and
               Kevin Duh},
  title     = {Curriculum Learning for Domain Adaptation in Neural Machine Translation},
  pages     = {1903--1915},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
}
@inproceedings{DBLP:conf/acl/ChuDK17,
  author    = {Chenhui Chu and
               Raj Dabre and
               Sadao Kurohashi},
  title     = {An Empirical Comparison of Domain Adaptation Methods for Neural Machine
               Translation},
  pages     = {385--391},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/emnlp/DomhanH17,
  author    = {Tobias Domhan and
               Felix Hieber},
  title     = {Using Target-side Monolingual Data for Neural Machine Translation
               through Multi-task Learning},
  pages     = {1500--1505},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
}
@inproceedings{DBLP:conf/naacl/BapnaF19,
  author    = {Ankur Bapna and
               Orhan Firat},
  title     = {Non-Parametric Adaptation for Neural Machine Translation},
  pages     = {1921--1931},
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
}


@article{DBLP:journals/corr/abs-2010-11125,
  author    = {Angela Fan and
               Shruti Bhosale and
               Holger Schwenk and
               Zhiyi Ma and
               Ahmed El-Kishky and
               Siddharth Goyal and
               Mandeep Baines and
               Onur Celebi and
               Guillaume Wenzek and
               Vishrav Chaudhary and
               Naman Goyal and
               Tom Birch and
               Vitaliy Liptchinsky and
               Sergey Edunov and
               Edouard Grave and
               Michael Auli and
               Armand Joulin},
  title     = {Beyond English-Centric Multilingual Machine Translation},
  journal   = {CoRR},
  volume    = {abs/2010.11125},
  year      = {2020}
}
@inproceedings{DBLP:conf/emnlp/LinPWQFZL20,
  author    = {Zehui Lin and
               Xiao Pan and
               Mingxuan Wang and
               Xipeng Qiu and
               Jiangtao Feng and
               Hao Zhou and
               Lei Li},
  title     = {Pre-training Multilingual Neural Machine Translation by Leveraging
               Alignment Information},
  pages     = {2649--2663},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2020}
}

@inproceedings{DBLP:conf/emnlp/ZhuH07,
  author    = {Jingbo Zhu and
               Eduard H. Hovy},
  editor    = {Jason Eisner},
  title     = {Active Learning for Word Sense Disambiguation with Methods for Addressing
               the Class Imbalance Problem},
  pages     = {783--790},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2007}
}
@inproceedings{DBLP:conf/eacl/NegriTFBF17,
  author    = {M. Amin Farajian and
               Marco Turchi and
               Matteo Negri and
               Nicola Bertoldi and
               Marcello Federico},
  title     = {Neural vs. Phrase-Based Machine Translation in a Multi-Domain Scenario},
  pages     = {280--284},
  publisher = {European Association of Computational Linguistics},
  year      = {2017}
}
@inproceedings{DBLP:conf/aaai/Zhang0LZC18,
  author    = {Zhirui Zhang and
               Shujie Liu and
               Mu Li and
               Ming Zhou and
               Enhong Chen},
  title     = {Joint Training for Neural Machine Translation Models with Monolingual
               Data},
  pages     = {555--562},
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2018}
}


@inproceedings{DBLP:conf/wmt/SunJXHWW19,
  author    = {Meng Sun and
               Bojian Jiang and
               Hao Xiong and
               Zhongjun He and
               Hua Wu and
               Haifeng Wang},
  title     = {Baidu Neural Machine Translation Systems for {WMT19}},
  pages     = {374--381},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}


@inproceedings{DBLP:conf/acl/SuHC19,
  author    = {Shang-Yu Su and
               Chao-Wei Huang and
               Yun-Nung Chen},
  title     = {Dual Supervised Learning for Natural Language Understanding and Generation},
  pages     = {5472--5477},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}


@article{DBLP:journals/ejasmp/RadzikowskiNWY19,
  author    = {Kacper Radzikowski and
               Robert Nowak and
               Le Wang and
               Osamu Yoshie},
  title     = {Dual supervised learning for non-native speech recognition},
  journal   = {{EURASIP} J. Audio Speech Music. Process.},
  volume    = {2019},
  pages     = {3},
  year      = {2019}
}
@incollection{qin2020dual,
  title={Dual Learning for Machine Translation and Beyond},
  author={Qin, Tao},
  pages={49--72},
  year={2020},
  publisher={Springer}
}
@inproceedings{DBLP:conf/iccv/YiZTG17,
  author    = {Zili Yi and
               Hao (Richard) Zhang and
               Ping Tan and
               Minglun Gong},
  title     = {DualGAN: Unsupervised Dual Learning for Image-to-Image Translation},
  pages     = {2868--2876},
  publisher = {{IEEE} Computer Society},
  year      = {2017}
}
@article{DBLP:journals/access/DuRZH20,
  author    = {Liang Du and
               Xin Ren and
               Peng Zhou and
               Zhiguo Hu},
  title     = {Unsupervised Dual Learning for Feature and Instance Selection},
  journal   = {{IEEE} Access},
  volume    = {8},
  pages     = {170248--170260},
  year      = {2020}
}
%%%%% chapter 16------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 17------------------------------------------------------

%%%%% chapter 17------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 18------------------------------------------------------

%%%%% chapter 18------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter appendix-A------------------------------------------------------
@inproceedings{Tong2012NiuTrans,
  author    = {Tong Xiao and
               Jingbo Zhu and
               Hao Zhang and
               Qiang Li},
  title     = {NiuTrans: An Open Source Toolkit for Phrase-based and Syntax-based
               Machine Translation},
  pages     = {19--24},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
}

@inproceedings{Li2010Joshua,
  author    = {Zhifei Li and
               Chris Callison-Burch and
               Chris Dyer and
               Sanjeev Khudanpur and
               Lane Schwartz and
               Wren N. G. Thornton and
               Jonathan Weese and
               Omar Zaidan},
  title     = {Joshua: An Open Source Toolkit for Parsing-Based Machine Translation},
  pages     = {135--139},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}

@inproceedings{iglesias2009hierarchical,
  author    = {Gonzalo Iglesias and
               Adri{\`{a}} de Gispert and
               Eduardo Rodr{\'{\i}}guez Banga and
               William J. Byrne},
  title     = {Hierarchical Phrase-Based Translation with Weighted Finite State Transducers},
  pages     = {433--441},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2009}
}

@inproceedings{dyer2010cdec,
  author    = {Chris Dyer and
               Adam Lopez and
               Juri Ganitkevitch and
               Jonathan Weese and
               Ferhan T{\"{u}}re and
               Phil Blunsom and
               Hendra Setiawan and
               Vladimir Eidelman and
               Philip Resnik},
  title     = {cdec: {A} Decoder, Alignment, and Learning Framework for Finite-State
               and Context-Free Translation Models},
  pages     = {7--12},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}

@inproceedings{Cer2010Phrasal,
  author    = {Daniel M. Cer and
               Michel Galley and
               Daniel Jurafsky and
               Christopher D. Manning},
  title     = {Phrasal: {A} Statistical Machine Translation Toolkit for Exploring
               New Model Features},
  pages     = {9--12},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
}

@article{vilar2012jane,
  title={Jane: an advanced freely available hierarchical machine translation toolkit},
  author={Vilar, David and Stein, Daniel and Huck, Matthias and Ney, Hermann},
  publisher={Machine Translation},
  volume={26},
  number={3},
  pages={197--216},
  year={2012}
}

@inproceedings{DBLP:conf/naacl/DyerCS13,
  author    = {Chris Dyer and
               Victor Chahuneau and
               Noah A. Smith},
  title     = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2},
  pages     = {644--648},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2013}
}

@article{al2016theano,
  author    = {Rami Al-Rfou and
               Guillaume Alain and
               Amjad Almahairi and
               Christof Angerm{\"{u}}ller and
               Dzmitry Bahdanau and
               Nicolas Ballas and
               Fr{\'{e}}d{\'{e}}ric Bastien and
               Justin Bayer and
               Anatoly Belikov and
               Alexander Belopolsky and
               Yoshua Bengio and
               Arnaud Bergeron and
               James Bergstra and
               Valentin Bisson and
               Josh Bleecher Snyder and
               Nicolas Bouchard and
               Nicolas Boulanger-Lewandowski and
               Xavier Bouthillier and
               Alexandre de Br{\'{e}}bisson and
               Olivier Breuleux and
               Pierre Luc Carrier and
               Kyunghyun Cho and
               Jan Chorowski and
               Paul F. Christiano and
               Tim Cooijmans and
               Marc-Alexandre C{\^{o}}t{\'{e}} and
               Myriam C{\^{o}}t{\'{e}} and
               Aaron C. Courville and
               Yann N. Dauphin and
               Olivier Delalleau and
               Julien Demouth and
               Guillaume Desjardins and
               Sander Dieleman and
               Laurent Dinh and
               Melanie Ducoffe and
               Vincent Dumoulin and
               Samira Ebrahimi Kahou and
               Dumitru Erhan and
               Ziye Fan and
               Orhan Firat and
               Mathieu Germain and
               Xavier Glorot and
               Ian J. Goodfellow and
               Matthew Graham and
               {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               Philippe Hamel and
               Iban Harlouchet and
               Jean-Philippe Heng and
               Bal{\'{a}}zs Hidasi and
               Sina Honari and
               Arjun Jain and
               S{\'{e}}bastien Jean and
               Kai Jia and
               Mikhail Korobov and
               Vivek Kulkarni and
               Alex Lamb and
               Pascal Lamblin and
               Eric Larsen and
               C{\'{e}}sar Laurent and
               Sean Lee and
               Simon Lefran{\c{c}}ois and
               Simon Lemieux and
               Nicholas L{\'{e}}onard and
               Zhouhan Lin and
               Jesse A. Livezey and
               Cory Lorenz and
               Jeremiah Lowin and
               Qianli Ma and
               Pierre-Antoine Manzagol and
               Olivier Mastropietro and
               Robert McGibbon and
               Roland Memisevic and
               Bart van Merri{\"{e}}nboer and
               Vincent Michalski and
               Mehdi Mirza and
               Alberto Orlandi and
               Christopher Joseph Pal and
               Razvan Pascanu and
               Mohammad Pezeshki and
               Colin Raffel and
               Daniel Renshaw and
               Matthew Rocklin and
               Adriana Romero and
               Markus Roth and
               Peter Sadowski and
               John Salvatier and
               Fran{\c{c}}ois Savard and
               Jan Schl{\"{u}}ter and
               John Schulman and
               Gabriel Schwartz and
               Iulian Vlad Serban and
               Dmitriy Serdyuk and
               Samira Shabanian and
               {\'{E}}tienne Simon and
               Sigurd Spieckermann and
               S. Ramana Subramanyam and
               Jakub Sygnowski and
               J{\'{e}}r{\'{e}}mie Tanguay and
               Gijs van Tulder and
               Joseph P. Turian and
               Sebastian Urban and
               Pascal Vincent and
               Francesco Visin and
               Harm de Vries and
               David Warde-Farley and
               Dustin J. Webb and
               Matthew Willson and
               Kelvin Xu and
               Lijun Xue and
               Li Yao and
               Saizheng Zhang and
               Ying Zhang},
  title     = {Theano: {A} Python framework for fast computation of mathematical
               expressions},
  journal   = {CoRR},
  volume    = {abs/1605.02688},
  year      = {2016}
}

@inproceedings{DBLP:journals/corr/SennrichFCBHHJL17,
  author    = {Rico Sennrich and
               Orhan Firat and
               Kyunghyun Cho and
               Barry Haddow and
			   Alexandra Birch and
               Julian Hitschler and
               Marcin Junczys-Dowmunt and
               Samuel L{\"{a}}ubli and
               Antonio Valerio Miceli Barone and
               Jozef Mokry and
               Maria Nadejde},
  title     = {Nematus: a Toolkit for Neural Machine Translation},
  publisher = {European Association of Computational Linguistics},
  pages     = {65--68},
  year      = {2017}
}

@inproceedings{Koehn2007Moses,
  author    = {Philipp Koehn and
               Hieu Hoang and
			    Alexandra Birch and
               Chris Callison-Burch and
               Marcello Federico and
               Nicola Bertoldi and
               Brooke Cowan and
               Wade Shen and
               Christine Moran and
               Richard Zens and
               Chris Dyer and
               Ondrej Bojar and
               Alexandra Constantin and
               Evan Herbst},
  title     = {Moses: Open Source Toolkit for Statistical Machine Translation},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@inproceedings{zollmann2007the,
  author    = {Andreas Zollmann and
               Ashish Venugopal and
               Matthias Paulik and
               Stephan Vogel},
  title     = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the
               2007 {ACL} Workshop on Statistical Machine Translation},
  pages     = {216--219},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2007}
}

@article{och2003systematic,
  author    = {Franz Josef Och and
               Hermann Ney},
  title     = {A Systematic Comparison of Various Statistical Alignment Models},
  journal   = {Computational Linguistics},
  volume    = {29},
  number    = {1},
  pages     = {19--51},
  year      = {2003}
}

@inproceedings{zoph2016simple,
  author    = {Barret Zoph and
               Ashish Vaswani and
               Jonathan May and
               Kevin Knight},
  title     = {Simple, Fast Noise-Contrastive Estimation for Large {RNN} Vocabularies},
  pages     = {1217--1222},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@inproceedings{Ottfairseq,
  author    = {Myle Ott and
               Sergey Edunov and
               Alexei Baevski and
               Angela Fan and
               Sam Gross and
               Nathan Ng and
               David Grangier and
               Michael Auli},
  title     = {fairseq: {A} Fast, Extensible Toolkit for Sequence Modeling},
  pages     = {48--53},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
}

@inproceedings{Vaswani2018Tensor2TensorFN,
   author    = {Ashish Vaswani and
               Samy Bengio and
               Eugene Brevdo and
               Fran{\c{c}}ois Chollet and
               Aidan N. Gomez and
               Stephan Gouws and
               Llion Jones and
               Lukasz Kaiser and
               Nal Kalchbrenner and
               Niki Parmar and
               Ryan Sepassi and
               Noam Shazeer and
               Jakob Uszkoreit},
  title     = {Tensor2Tensor for Neural Machine Translation},
  pages     = {193--199},
  publisher = {Association for Machine Translation in the Americas},
  year      = {2018}
}

@inproceedings{KleinOpenNMT,
  author    = {Guillaume Klein and
               Yoon Kim and
               Yuntian Deng and
               Jean Senellart and
               Alexander M. Rush},
  title     = {OpenNMT: Open-Source Toolkit for Neural Machine Translation},
  pages     = {67--72},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
}

@inproceedings{luong2016acl_hybrid,
  author    = {Minh-Thang Luong and
               Christopher D. Manning},
  title     = {Achieving Open Vocabulary Neural Machine Translation with Hybrid Word-Character
               Models},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
}

@article{ZhangTHUMT,
  author    = {Jiacheng Zhang and
               Yanzhuo Ding and
               Shiqi Shen and
               Yong Cheng and
               Maosong Sun and
               Huan-Bo Luan and
               Yang Liu},
  title     = {{THUMT:} An Open Source Toolkit for Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1706.06415},
  year      = {2017}
}

@inproceedings{JunczysMarian,
  author    = {Marcin Junczys-Dowmunt and
               Roman Grundkiewicz and
               Tomasz Dwojak and
               Hieu Hoang and
               Kenneth Heafield and
               Tom Neckermann and
               Frank Seide and
               Ulrich Germann and
               Alham Fikri Aji and
               Nikolay Bogoychev and
               Andr{\'{e}} F. T. Martins and
               Alexandra Birch},
  title     = {Marian: Fast Neural Machine Translation in {C++}},
  pages     = {116--121},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{hieber2017sockeye,
  author    = {Felix Hieber and
               Tobias Domhan and
               Michael Denkowski and
               David Vilar and
               Artem Sokolov and
               Ann Clifton and
               Matt Post},
  title     = {Sockeye: {A} Toolkit for Neural Machine Translation},
  journal   = {CoRR},
  volume    = {abs/1712.05690},
  year      = {2017}
}

@inproceedings{WangCytonMT,
  author    = {Xiaolin Wang and
               Masao Utiyama and
               Eiichiro Sumita},
  title     = {CytonMT: an Efficient Neural Machine Translation Open-source Toolkit
               Implemented in {C++}},
  pages     = {133--138},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
}

@article{DBLP:journals/corr/abs-1805-10387,
  author    = {Oleksii Kuchaiev and
               Boris Ginsburg and
               Igor Gitman and
               Vitaly Lavrukhin and
               Carl Case and
               Paulius Micikevicius},
  title     = {OpenSeq2Seq: extensible toolkit for distributed and mixed precision
               training of sequence-to-sequence models},
  journal   = {CoRR},
  volume    = {abs/1805.10387},
  year      = {2018}
}

@article{nmtpy2017,
  author    = {Ozan Caglayan and
               Mercedes Garc{\'{\i}}a-Mart{\'{\i}}nez and
               Adrien Bardet and
               Walid Aransa and
               Fethi Bougares and
               Lo{\"{\i}}c Barrault},
  title     = {{NMTPY:} {A} Flexible Toolkit for Advanced Neural Machine Translation
               Systems},
  journal   = {The Prague Bulletin of Mathematical Linguistics},
  volume    = {109},
  pages     = {15--28},
  year      = {2017}
}
%%%%% chapter appendix-A------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
